{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Python机器学习Kaggle案例实战（第21期） 第3课书面作业\n",
    "学号：113778\n",
    "1. 尝试对本周数据集进行一些有趣的数据探索\n",
    "2. 安装keras，并使用keras解决上周的婚外情预测问题\n",
    "\n",
    "## 第1题：尝试对本周数据集进行一些有趣的数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T12:57:27.793289Z",
     "iopub.status.busy": "2021-07-24T12:57:27.792944Z",
     "iopub.status.idle": "2021-07-24T12:57:28.783137Z",
     "shell.execute_reply": "2021-07-24T12:57:28.782374Z",
     "shell.execute_reply.started": "2021-07-24T12:57:27.793258Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1)加载数据集，先看一下性别年龄依据以及手机品牌数据集。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:11:42.362436Z",
     "iopub.status.busy": "2021-07-24T13:11:42.362066Z",
     "iopub.status.idle": "2021-07-24T13:11:42.372178Z",
     "shell.execute_reply": "2021-07-24T13:11:42.370954Z",
     "shell.execute_reply.started": "2021-07-24T13:11:42.362400Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/kaggle/input/gender_age_test.csv.zip\n",
      "/kaggle/input/app_labels.csv.zip\n",
      "/kaggle/input/sample_submission.csv.zip\n",
      "/kaggle/input/phone_brand_device_model.csv.zip\n",
      "/kaggle/input/events.csv.zip\n",
      "/kaggle/input/app_events.csv.zip\n",
      "/kaggle/input/label_categories.csv.zip\n",
      "/kaggle/input/gender_age_train.csv.zip\n"
     ]
    }
   ],
   "source": [
    "import numpy as np # linear algebra\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "\n",
    "# Input data files are available in the read-only \"../input/\" directory\n",
    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
    "\n",
    "import os\n",
    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
    "    for filename in filenames:\n",
    "        print(os.path.join(dirname, filename))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:12:48.058303Z",
     "iopub.status.busy": "2021-07-24T13:12:48.057922Z",
     "iopub.status.idle": "2021-07-24T13:12:48.509194Z",
     "shell.execute_reply": "2021-07-24T13:12:48.508156Z",
     "shell.execute_reply.started": "2021-07-24T13:12:48.058267Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "gatrain = pd.read_csv('/kaggle/input/gender_age_train.csv.zip')\n",
    "gatest = pd.read_csv('/kaggle/input/gender_age_test.csv.zip')\n",
    "phone = pd.read_csv('/kaggle/input/phone_brand_device_model.csv.zip',encoding='utf-8')\n",
    "#删除手机数据集中重复情况\n",
    "phone = phone.drop_duplicates('device_id', keep='first')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2)查看一下性别、年龄分布情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:13:06.439117Z",
     "iopub.status.busy": "2021-07-24T13:13:06.438782Z",
     "iopub.status.idle": "2021-07-24T13:13:06.453628Z",
     "shell.execute_reply": "2021-07-24T13:13:06.452570Z",
     "shell.execute_reply.started": "2021-07-24T13:13:06.439087Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "男女比例情况：\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "M    0.641758\n",
       "F    0.358242\n",
       "Name: gender, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"男女比例情况：\")\n",
    "gatrain.gender.value_counts()/len(gatrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:13:17.832675Z",
     "iopub.status.busy": "2021-07-24T13:13:17.832355Z",
     "iopub.status.idle": "2021-07-24T13:13:18.000493Z",
     "shell.execute_reply": "2021-07-24T13:13:17.999301Z",
     "shell.execute_reply.started": "2021-07-24T13:13:17.832645Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD6CAYAAABDPiuvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAOHElEQVR4nO3cf6zV9X3H8eerUJ3ZZsF6xwwXh0tJFmoyqzdI0/2xaYoXXYrJWqNbBjFE/lCTLlmy4f4h1ZpolsyNzJqRSYRmk5q2RlJxjKDNsj9QrtWp6Ax3VgNEhRbUdaYa7Ht/nA/Lkd7LPSCcc8d9PpKT8/m+P5/v975PQnjd749zU1VIkma2Tw26AUnS4BkGkiTDQJJkGEiSMAwkSRgGkiR6DIMkryd5McnzScZa7YIkO5Lsbe9zWz1J1icZT/JCksu7jrOqrd+bZFVX/Yp2/PG2b073B5UkTe5kzgz+oKouq6qRtr0W2FlVi4CdbRtgObCovdYAD0AnPIB1wJXAEmDdsQBpa27p2m/0lD+RJOmkfZLLRCuATW28Cbi+q765OnYBc5JcBFwD7Kiqw1V1BNgBjLa586tqV3W+Abe561iSpD6Y3eO6Av41SQH/UFUbgHlV9WabfwuY18bzgX1d++5vtRPV909QP6ELL7ywFi5c2GP7kqRnn332J1U1NNFcr2Hwe1V1IMlvADuS/Gf3ZFVVC4ozKskaOpeeuPjiixkbGzvTP1KSzhpJ3phsrqfLRFV1oL0fBB6lc83/7XaJh/Z+sC0/ACzo2n241U5UH56gPlEfG6pqpKpGhoYmDDdJ0imYMgyS/GqSXz82BpYBLwFbgWNPBK0CHmvjrcDK9lTRUuDddjlpO7Asydx243gZsL3NvZdkaXuKaGXXsSRJfdDLZaJ5wKPtac/ZwD9X1b8k2Q08kmQ18AZwQ1u/DbgWGAfeB24GqKrDSe4Cdrd1d1bV4Ta+FXgIOA94or0kSX2S/69/wnpkZKS8ZyBJvUvybNfXAz7GbyBLkgwDSZJhIEnCMJAk0fuXznQKFq59fNAtnFVev+e6QbcgnbU8M5AkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkcRJhkGRWkueS/KBtX5Lk6STjSb6T5JxWP7dtj7f5hV3HuKPVX01yTVd9tNXGk6w9jZ9PktSDkzkz+DrwStf2vcB9VfU54AiwutVXA0da/b62jiSLgRuBzwOjwLdawMwC7geWA4uBm9paSVKf9BQGSYaB64B/bNsBrgK+25ZsAq5v4xVtmzZ/dVu/AthSVR9U1Y+BcWBJe41X1WtV9SGwpa2VJPVJr2cGfwv8BfCLtv1Z4J2qOtq29wPz23g+sA+gzb/b1v9f/bh9JqtLkvpkyjBI8ofAwap6tg/9TNXLmiRjScYOHTo06HYk6azRy5nBl4CvJHmdziWcq4C/A+Ykmd3WDAMH2vgAsACgzX8G+Gl3/bh9Jqv/kqraUFUjVTUyNDTUQ+uSpF5MGQZVdUdVDVfVQjo3gJ+sqj8BngK+2patAh5r461tmzb/ZFVVq9/Ynja6BFgEPAPsBha1p5POaT9j62n5dJKknsyeesmk/hLYkuSbwHPAg63+IPDtJOPAYTr/uVNVe5I8ArwMHAVuq6qPAJLcDmwHZgEbq2rPJ+hLknSSTioMquqHwA/b+DU6TwIdv+bnwNcm2f9u4O4J6tuAbSfTiyTp9PEbyJIkw0CSZBhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJLoIQyS/EqSZ5L8R5I9Sb7R6pckeTrJeJLvJDmn1c9t2+NtfmHXse5o9VeTXNNVH2218SRrz8DnlCSdQC9nBh8AV1XV7wKXAaNJlgL3AvdV1eeAI8Dqtn41cKTV72vrSLIYuBH4PDAKfCvJrCSzgPuB5cBi4Ka2VpLUJ1OGQXX8rG1+ur0KuAr4bqtvAq5v4xVtmzZ/dZK0+paq+qCqfgyMA0vaa7yqXquqD4Etba0kqU96umfQfoN/HjgI7AD+C3inqo62JfuB+W08H9gH0ObfBT7bXT9un8nqE/WxJslYkrFDhw710rokqQc9hUFVfVRVlwHDdH6T/50z2dQJ+thQVSNVNTI0NDSIFiTprHRSTxNV1TvAU8AXgTlJZrepYeBAGx8AFgC0+c8AP+2uH7fPZHVJUp/08jTRUJI5bXwe8GXgFTqh8NW2bBXwWBtvbdu0+Serqlr9xva00SXAIuAZYDewqD2ddA6dm8xbT8NnkyT1aPbUS7gI2NSe+vkU8EhV/SDJy8CWJN8EngMebOsfBL6dZBw4TOc/d6pqT5JHgJeBo8BtVfURQJLbge3ALGBjVe05bZ9QkjSlKcOgql4AvjBB/TU69w+Or/8c+Nokx7obuHuC+jZgWw/9SpLOAL+BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkevtzFJLOQgvXPj7oFs4qr99z3aBb+EQ8M5AkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiR6CIMkC5I8leTlJHuSfL3VL0iyI8ne9j631ZNkfZLxJC8kubzrWKva+r1JVnXVr0jyYttnfZKciQ8rSZpYL2cGR4E/r6rFwFLgtiSLgbXAzqpaBOxs2wDLgUXttQZ4ADrhAawDrgSWAOuOBUhbc0vXfqOf/KNJkno1ZRhU1ZtV9aM2/m/gFWA+sALY1JZtAq5v4xXA5urYBcxJchFwDbCjqg5X1RFgBzDa5s6vql1VVcDmrmNJkvrgpO4ZJFkIfAF4GphXVW+2qbeAeW08H9jXtdv+VjtRff8EdUlSn/QcBkl+Dfge8GdV9V73XPuNvk5zbxP1sCbJWJKxQ4cOnekfJ0kzRk9hkOTTdILgn6rq+638drvEQ3s/2OoHgAVduw+32onqwxPUf0lVbaiqkaoaGRoa6qV1SVIPenmaKMCDwCtV9TddU1uBY08ErQIe66qvbE8VLQXebZeTtgPLksxtN46XAdvb3HtJlraftbLrWJKkPpjdw5ovAX8KvJjk+Vb7K+Ae4JEkq4E3gBva3DbgWmAceB+4GaCqDie5C9jd1t1ZVYfb+FbgIeA84In2kiT1yZRhUFX/Dkz23P/VE6wv4LZJjrUR2DhBfQy4dKpeJElnht9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkiR7CIMnGJAeTvNRVuyDJjiR72/vcVk+S9UnGk7yQ5PKufVa19XuTrOqqX5HkxbbP+iQ53R9SknRivZwZPASMHldbC+ysqkXAzrYNsBxY1F5rgAegEx7AOuBKYAmw7liAtDW3dO13/M+SJJ1hU4ZBVf0bcPi48gpgUxtvAq7vqm+ujl3AnCQXAdcAO6rqcFUdAXYAo23u/KraVVUFbO46liSpT071nsG8qnqzjd8C5rXxfGBf17r9rXai+v4J6pKkPvrEN5Dbb/R1GnqZUpI1ScaSjB06dKgfP1KSZoRTDYO32yUe2vvBVj8ALOhaN9xqJ6oPT1CfUFVtqKqRqhoZGho6xdYlScc71TDYChx7ImgV8FhXfWV7qmgp8G67nLQdWJZkbrtxvAzY3ubeS7K0PUW0sutYkqQ+mT3VgiQPA78PXJhkP52ngu4BHkmyGngDuKEt3wZcC4wD7wM3A1TV4SR3Abvbujur6thN6VvpPLF0HvBEe0mS+mjKMKiqmyaZunqCtQXcNslxNgIbJ6iPAZdO1Yck6czxG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCQxjcIgyWiSV5OMJ1k76H4kaSaZFmGQZBZwP7AcWAzclGTxYLuSpJljWoQBsAQYr6rXqupDYAuwYsA9SdKMMV3CYD6wr2t7f6tJkvpg9qAbOBlJ1gBr2ubPkrw6yH7OIhcCPxl0E1PJvYPuQAPiv8/T57cmm5guYXAAWNC1PdxqH1NVG4AN/WpqpkgyVlUjg+5Dmoj/Pvtjulwm2g0sSnJJknOAG4GtA+5JkmaMaXFmUFVHk9wObAdmARuras+A25KkGWNahAFAVW0Dtg26jxnKS2+azvz32QepqkH3IEkasOlyz0CSNECGgSTJMJAkGQaSppEkFw+6h5nKG8gzTJITfn+jqr7Sr16k4yX5UVVd3sbfq6o/GnRPM8W0ebRUffNFOn8H6mHgaSCDbUf6mO5/j789sC5mIMNg5vlN4MvATcAfA48DD/slP00TNclYZ5iXiWawJOfSCYW/Br5RVX8/4JY0wyX5CPgfOmcI5wHvH5sCqqrOH1RvZzvPDGagFgLX0QmChcB64NFB9iQBVNWsQfcwU3lmMMMk2QxcSudPf2ypqpcG3JKkacAwmGGS/ILOaTh8/Jqsp+HSDGYYSJL80pkkyTCQJGEYSJIwDCRJGAaSJOB/Ae0p5eH5+8yUAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "gatrain.gender.value_counts().plot(kind='bar')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看出男性比女性要多出近一倍。下面看一下按性别年龄分组分布情况："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:14:34.096176Z",
     "iopub.status.busy": "2021-07-24T13:14:34.095814Z",
     "iopub.status.idle": "2021-07-24T13:14:34.113034Z",
     "shell.execute_reply": "2021-07-24T13:14:34.112148Z",
     "shell.execute_reply.started": "2021-07-24T13:14:34.096142Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "按组分布比例情况：\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "F27-28    0.041771\n",
       "F24-26    0.056132\n",
       "F43+      0.056186\n",
       "F29-32    0.062000\n",
       "F23-      0.067654\n",
       "M27-28    0.072945\n",
       "F33-42    0.074499\n",
       "M29-31    0.097917\n",
       "M22-      0.100315\n",
       "M39+      0.114957\n",
       "M32-38    0.126948\n",
       "M23-26    0.128676\n",
       "Name: group, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"按组分布比例情况：\")\n",
    "gatrain.group.value_counts().sort_values()/len(gatrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:15:13.743937Z",
     "iopub.status.busy": "2021-07-24T13:15:13.743593Z",
     "iopub.status.idle": "2021-07-24T13:15:13.928878Z",
     "shell.execute_reply": "2021-07-24T13:15:13.927999Z",
     "shell.execute_reply.started": "2021-07-24T13:15:13.743905Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEWCAYAAACEz/viAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcVUlEQVR4nO3de7glVXnn8e9PGlBEbtIBQqONhhgxeCEdxIdMEiUBRGOTRI2axFZJyEWjuYs6GeItokk0ODOaIUKCxkgUDTCBgD0IOibDpaGRi6i0CNL9gLbhogmJpuGdP2od2ZycBvqcqjrdu7+f59nPqVpVu96qvffZb9WqtdZOVSFJ2r49YrF3QJK0+EwGkiSTgSTJZCBJwmQgScJkIEniYSSDJKcn+XqS6ybK9kqyOsmN7e+erTxJ3ptkXZJrkhw68ZxVbf0bk6yaKP+hJNe257w3Sfo+SEnSg3s4VwZ/BRwzq+xE4KKqOgi4qM0DPBc4qD1OAN4PXfIATgKeCRwGnDSTQNo6vzzxvNmxJEkDW/JQK1TVZ5Isn1W8EvjxNn0GcAnw+lb+wep6sl2aZI8k+7V1V1fVHQBJVgPHJLkE2K2qLm3lHwSOA/7hofZr7733ruXLZ++WJGlzrrzyym9U1dK5lj1kMtiMfarqtjZ9O7BPm94fuHVivfWt7MHK189R/pCWL1/OmjVrtnzPJWk7leSWzS1b8A3kdhUwypgWSU5IsibJmo0bN44RUpK2C/NNBl9r1T+0v19v5RuAAybWW9bKHqx82Rzlc6qqU6tqRVWtWLp0zisdSdI8zDcZnAvMtAhaBZwzUf7y1qrocODuVp10IXBUkj3bjeOjgAvbsm8mOby1Inr5xLYkSSN5yHsGST5CdwN47yTr6VoFnQx8NMnxwC3Ai9vq5wPHAuuAe4BXAlTVHUneClzR1nvLzM1k4NfpWiw9iu7G8UPePJYk9Svb6hDWK1asKG8gS9LDl+TKqlox1zJ7IEuSTAaSJJOBJIn5dzrbqi0/8bx5Pe/mk5/X855I0rbBKwNJkslAkjSl1URjskpK0jTwykCSZDKQJJkMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEn4s5fbHH9mU9IQvDKQJJkMJEkmA0kSJgNJEiYDSRImA0kSJgNJEiYDSRILTAZJfivJ9UmuS/KRJI9McmCSy5KsS/K3SXZq6+7c5te15csntvOGVv7FJEcv8JgkSVto3skgyf7Aa4EVVfWDwA7AS4B3Au+pqu8D7gSOb085Hrizlb+nrUeSg9vzngIcA7wvyQ7z3S9J0pZbaDXREuBRSZYAuwC3Ac8BzmrLzwCOa9Mr2zxt+ZFJ0srPrKpvV9VXgHXAYQvcL0nSFph3MqiqDcCfAF+lSwJ3A1cCd1XVprbaemD/Nr0/cGt77qa2/mMny+d4jiRpBAupJtqT7qz+QOB7gUfTVfMMJskJSdYkWbNx48YhQ0nSdmUh1UQ/AXylqjZW1X8AnwCOAPZo1UYAy4ANbXoDcABAW7478M+T5XM85wGq6tSqWlFVK5YuXbqAXZckTVpIMvgqcHiSXVrd/5HA54GLgRe2dVYB57Tpc9s8bfmnqqpa+Utaa6MDgYOAyxewX5KkLTTv3zOoqsuSnAVcBWwC1gKnAucBZyZ5Wys7rT3lNOBDSdYBd9C1IKKqrk/yUbpEsgl4dVXdO9/9kiRtuQX9uE1VnQScNKv4JuZoDVRV/w68aDPbeTvw9oXsiyRp/uyBLEkyGUiSTAaSJEwGkiRMBpIkTAaSJBbYtFTTb/mJ583reTef/Lye90TSkLwykCSZDCRJJgNJEt4z0FbGexTS4vDKQJJkMpAkmQwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJmAwkSZgMJEmYDCRJLDAZJNkjyVlJvpDkhiTPSrJXktVJbmx/92zrJsl7k6xLck2SQye2s6qtf2OSVQs9KEnSllnolcEpwAVV9QPA04AbgBOBi6rqIOCiNg/wXOCg9jgBeD9Akr2Ak4BnAocBJ80kEEnSOOadDJLsDvwocBpAVX2nqu4CVgJntNXOAI5r0yuBD1bnUmCPJPsBRwOrq+qOqroTWA0cM9/9kiRtuYVcGRwIbAT+MsnaJB9I8mhgn6q6ra1zO7BPm94fuHXi+etb2ebKJUkjWUgyWAIcCry/qp4B/Cv3VwkBUFUF1AJiPECSE5KsSbJm48aNfW1WkrZ7C0kG64H1VXVZmz+LLjl8rVX/0P5+vS3fABww8fxlrWxz5f9JVZ1aVSuqasXSpUsXsOuSpEnzTgZVdTtwa5IntaIjgc8D5wIzLYJWAee06XOBl7dWRYcDd7fqpAuBo5Ls2W4cH9XKJEkjWbLA5/8G8OEkOwE3Aa+kSzAfTXI8cAvw4rbu+cCxwDrgnrYuVXVHkrcCV7T13lJVdyxwvyRJW2BByaCqrgZWzLHoyDnWLeDVm9nO6cDpC9kXSdL82QNZkmQykCSZDCRJmAwkSSy8NZG0TVt+4nnzet7NJz+v5z2RFpdXBpIkk4EkyWoiaVRWS2lr5ZWBJMlkIEkyGUiSMBlIkjAZSJIwGUiSMBlIkrCfgTTV7Negh8srA0mSyUCSZDKQJGEykCRhMpAkYTKQJGEykCRhMpAkYaczST2yk9u2yysDSZLJQJJkMpAkYTKQJGEykCRhMpAkYTKQJGEykCRhMpAkYQ9kSduwsXs8zyfettK7esFXBkl2SLI2yd+3+QOTXJZkXZK/TbJTK9+5za9ry5dPbOMNrfyLSY5e6D5JkrZMH9VErwNumJh/J/Ceqvo+4E7g+FZ+PHBnK39PW48kBwMvAZ4CHAO8L8kOPeyXJOlhWlAySLIMeB7wgTYf4DnAWW2VM4Dj2vTKNk9bfmRbfyVwZlV9u6q+AqwDDlvIfkmStsxCrwz+DPh94L42/1jgrqra1ObXA/u36f2BWwHa8rvb+t8tn+M5D5DkhCRrkqzZuHHjAnddkjRj3skgyfOBr1fVlT3uz4OqqlOrakVVrVi6dOlYYSVp6i2kNdERwAuSHAs8EtgNOAXYI8mSdva/DNjQ1t8AHACsT7IE2B3454nyGZPPkSSNYN5XBlX1hqpaVlXL6W4Af6qqfh64GHhhW20VcE6bPrfN05Z/qqqqlb+ktTY6EDgIuHy++yVJ2nJD9DN4PXBmkrcBa4HTWvlpwIeSrAPuoEsgVNX1ST4KfB7YBLy6qu4dYL8kSZvRSzKoqkuAS9r0TczRGqiq/h140Wae/3bg7X3siyRpyzkchSTJZCBJMhlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIYZghrSdICLT/xvHk97+aTnzev53llIEkyGUiSTAaSJEwGkiRMBpIkTAaSJEwGkiRMBpIkTAaSJEwGkiRMBpIkTAaSJEwGkiRMBpIkTAaSJEwGkiRMBpIkTAaSJEwGkiRMBpIkFpAMkhyQ5OIkn09yfZLXtfK9kqxOcmP7u2crT5L3JlmX5Jokh05sa1Vb/8YkqxZ+WJKkLbGQK4NNwO9U1cHA4cCrkxwMnAhcVFUHARe1eYDnAge1xwnA+6FLHsBJwDOBw4CTZhKIJGkc804GVXVbVV3Vpr8F3ADsD6wEzmirnQEc16ZXAh+szqXAHkn2A44GVlfVHVV1J7AaOGa++yVJ2nK93DNIshx4BnAZsE9V3dYW3Q7s06b3B26deNr6Vra58rninJBkTZI1Gzdu7GPXJUn0kAyS7Ap8HPjNqvrm5LKqKqAWGmNie6dW1YqqWrF06dK+NitJ270FJYMkO9Ilgg9X1Sda8dda9Q/t79db+QbggImnL2tlmyuXJI1kIa2JApwG3FBV755YdC4w0yJoFXDORPnLW6uiw4G7W3XShcBRSfZsN46PamWSpJEsWcBzjwB+Ebg2ydWt7I3AycBHkxwP3AK8uC07HzgWWAfcA7wSoKruSPJW4Iq23luq6o4F7JckaQvNOxlU1WeBbGbxkXOsX8CrN7Ot04HT57svkqSFsQeyJMlkIEkyGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIkjAZSJIwGUiSMBlIktiKkkGSY5J8Mcm6JCcu9v5I0vZkq0gGSXYA/ifwXOBg4KVJDl7cvZKk7cdWkQyAw4B1VXVTVX0HOBNYucj7JEnbjVTVYu8DSV4IHFNVv9TmfxF4ZlW9ZtZ6JwAntNknAV+cR7i9gW8sYHe31ljGM57xtp948431+KpaOteCJQvbn3FV1anAqQvZRpI1VbWip13aamIZz3jG237iDRFra6km2gAcMDG/rJVJkkawtSSDK4CDkhyYZCfgJcC5i7xPkrTd2CqqiapqU5LXABcCOwCnV9X1A4VbUDXTVhzLeMYz3vYTr/dYW8UNZEnS4tpaqokkSYvIZCBJMhlIkkwGkiSmPBkkeU2Svdv09yX5TJK7klyW5JCeY+2V5L8l+aV03pTk75P8cZI9+4y1mfhvGDrGHDFPeOi1Bok7SKuNJEcnOT7J8lnlrxog1ookFyf56yQHJFmd5O4kVyR5Rt/xNrMPi/L+jWGR/h8eO2Ksvfve5lQnA+DXqmqmy/YpwHuqag/g9cCf9xzrr4FHAz8EXAzsC7wT+Dfgr3qONZcXjRBjtl8dasMtuc71eCxw7ADx/gh4E3AIcFGS35hY/Jq5n7Ug7wPeBZwH/BPwv6pqd+DEtmwMQ75/uyV5R5IPJXnZrGVjHN+g/w9JTp440VyR5CbgsiS3JPmxnmM9N8lXknw2yTOSXN9irU9yZG+BqmpqH8AXJ6avmLXsmp5jXd3+Btgw17KBj/WqRXh91w647XuBm4CvTDxm5r8zQLxrgSVteg/gfLqTh0GOc3KbwFfHel1HfP8+DpwMHEfXgfTjwM5t2eCf1aFjANdOTF8M/HCb/n5gTc+xrgaeDDwL+Gfg8Fb+5D6Pc6vodDags5L8FfAW4O+S/Cbwd8BzgK/2HOsRrTroMcCuSZZX1c3tTHannmMBkOQrQNEloP3a2UmAqqonDBFzlp8acNs3AUdW1X96n5LcOkC8JVW1CaCq7kryU8CpST7GMO/fvyc5CtgdqCTHVdXZ7azy3gHizWXI9++JVfWzbfrsJG8CPpXkBUMFHPn/YUmSmc/Mo6rqCrpAX0qyc8+x7quqGwCS3FNVl7ZYNyTprXZnqpNBVb0pySuAjwBPBHamG/X0bODnew73DuALbfpVwAeSFN3vM7y551gAVNWBM9NJ1lbVoHXNSX4A2B+4rKr+parWt/JjquqCnsP9GbAncyftd/UcC+DLSX6sqj4NUFX3AscneRvwsw/+1Hn5VbrjuA84Gvi1duKygftH5u1NktBVnRRwFt0J0cokXwD+vKru6znkzkkeMbPdqnp7kg3AZ4Bde45FizHm/8P7gPOTnAxckOQU4BN0r+vVPce6K8mvALsBdyb5LeCjwE8A/9JXEHsg96j9SE+qG15jCfB0uiqj20aIPeiHP8lrgVcDN9Ad1+uq6py27KqqOnSo2GNI8iiAqvq3OZbtX1Xb9MCJrZ7+e+iucr5Jd2J0LvA84GtV9bqe470L+GRV/Z9Z5ccA/72qDuoz3hzxxzg5+nHg1+iqhpYAt9KdaP5lVf1Hj3EOAP4r3YnDm4GXAscDtwC/O3PVsOA4054MkuwGLK2qL88qf2pVXdNzrH0Bqur2JEuB/0J332KocZYmY/+PmvX7Dz1v/1rgWVX1L621zVnAh6rqlKH+8ZIcRneJf0W6X747BvhCVZ3fd6zNxP+jqnrjgNt/wJXWRHnvV1pJrq2qQ5LsCNwO7FdV32knLVdV1VP7jLfYhv5/mEZTnQySvJiuuuHrwI7AK2bq9vo+m22XcSfS1VG+E3gFcB3wI8C7quq0vmIthiTXV9VTJuZ3pUsInweeU1VP7zneSXQ/g7oEWA08k+5G3U8CF1bV23uO997ZRcAvAh8EqKrX9hxv1CutyYSd5IKqOmZi2dUDvH9zVkvRVaUOUS01upGT+dF0Q/tfVFU3T5S/qqpO7yVIn3e9t7YHXd3dfm36MLoP4k+3+bU9x7oW2AV4LF093r6tfE8Gak1EV/f6QuC3gNfSnTk/YqBYnwKePqtsCd2X5b0DxLuWbgTbXeiqNXZr5Y+i55Zgbbu30jUPfjmwqj02zkwPdHy7tunlwBq6hND7Z7Nt8x9m4s0q3xe4fIB476NLAue21/VjdMn1TOCUvuO1mIdxf6ueg4HfBo4dKNZr6X5p8WzgZmDlxLJeWzIBf0R3r+XPgC8DvzFErN5fpK3pwUTzrza/H3BleyP7fsOumpj+3Kxlawc4thcDlwMfaB+QDwEfBq4BDhkg3rKZBDfHsiMGiLd2ruk2f/UA8R7T/tn+BvjeVnZT33Em4l0/a35X4ALg3UMc34Psx6OB7xlgu9e2vzvSNYfcqc0vGSiZnwRc2pLqO+hOXv6gfYm+aYjjGyuZM1Kz51E+cIv1oOvM88RZZbsBFwHf7jnWlcCObXrZRPkjZyeHnuJdA+zSpvemqzoBeCrwT4v92vdwfJdNHN8jJsp37zuRz4o702nwd4GbB4wz6pXWRIwd5yjbe4A4ayemL5i17OoB4o19JTlaMgdumDW/A3Aa3dXW9X3FmfoeyHR1v99VVd+kq07pe4iBn6arH6Vak8vmscDv9BwLuuOaafnyr3QtRajupvhuvQdLDklyaZJbk5yaiSE2klzedzzgR6vqHoB6YP3yjnRVN4Ooqivp6rf/DfjsUHHoqqNunxV7U1W9HPjRvoMleXaS9cBtST6ZBw658cm+4wG3t/tK1APvT+wLfGeAeJuq6t72mfly+z+nutZhQ9yf+FqSp8/MVHfP4Pl0J2a9DnVDa/Y8Eeveqjqerprqyb1F6Ttjbu0P4PnTEIvuJvWFdEMo/F/gja18L3o8W5iI91m6JLoH3Vnz9bSrLsbrMXvCtH5Whj4+up+WfUqbfiFwI/f3ZB3l/WuxhqqWGvVKkhGrTemubh61mWX79xZnrA/B1vIY4oOxWLHoxuj5XeAnJ8oeQev233Os2fdBnj3zhTLWazrmezdt8eZ4/55Cd2Z53Ijv3x8OuO05P/O0M/WRjm+0k5UhXstpryaaSx56lW0jVlWdX1V/UlWroRvcraruq6pvDxEvye4TsS+m65n7IeDxQ8SbaxdGijON8f5jph8MQHV9X44E/hAYtAPYhMGGopjrM5/k16vqG1V17VBxZxls4L859P5aTvVwFJvxK9MQK8kRdC2J7qO7//E24AlJdgJeXFX/r+eQ76Srn7x0pqCqrmmjJv5Bz7E2Z8ixdOYy5mcFhj2+E4F9mLhPUVXrW130WJ2zBkt2SX57jlhvSPJIgKp691CxZ8UcS++xprrTGYzbi3Xk3s6X03VJ3xX438BxVfXZJIfSdfc/os94iyHJE4CfAQ6gG7ztS8DfVLs5OEC8MT8rrwX+rqqGGHRvrniPqzkG/RvT5FhFA2z7W3RNLq/n/i/K36RrLkxVDTI+2Kx9WFYPbDwyZKzeX8upTgZj9mIds7dz2+baur9H6Q1V9eSJZUPEO/fBlldVr5et7cvy+XTtxI8F1gJ30bXa+vWquqTneKN9Vlq8u+lagX2ZbiDFj1XVxj5jzIr33c9Eko/X/SOKDibJs+mqEieT+Qeqat0AsR4H/CndaLdvrqp7ktxU44zeO7MPn6qq5wyw3XcDH6+qf+x72w8w1g2PxXgwYttjRuzt3Lb5uYnp42Ytu26AeBuBq4Dfo2v6+GOTj6Heuza9C3BJm37cQK/n2O3U19Ld7D+Krs34Rrp26quAxwwRb67poR50Hb/+EvgFup7Ifwz8cjvuFw0YdyXwj3QtpobsNHjNrMe1wLdn5nuOtZGuU9stdCPdPmOIY5r2ewabqhuK+J4kD2h7nKTvy9Udqo1OWlWXt7Oiv28jDg5x+fUHSXapqnuq6uyZwiRPpI2n07N96c6SXwq8jO4Xuj5Sww7Ct4TujHJn2rDHVfXVNtha38b8rLRN1310bfw/2Y7puXSv758AS/uOt5npoTy/qg4BSHIm8Omq+r0kZ9E1hf7YEEGr6pwkF9H1SB6yyuZmupOGt9H1SQndcQ1x32d9Va1I8v3AzwF/3UZI/gjd/+CXeoky9BnCYj4Yse0xc/d2fgwD9HZe7Afdl/Mr6M5YXjNQjNfRnWX9Bd1V1itb+VLgM9vyZ6Vtd+2DLNtlgHj30n15fQvY1KZn5r85QLzPAXu16ccBl04sG6IfzOP63ubDiPnTdNWYL2jzg1yJzPX5oxtp4B3Aur7iTPs9g51r7iZne9NV6fTW5CzJ04B7qurGWeU70rXu+XBfsdp2R63DbzF3phv//qV047GcC5xeA431n+QpdC2YrquqLzzU+guMNdpnpW33+6uvM7qtUJKfo6vS+BLwJLrfIz+vDe1+SlW97EE3sOXxRr8n0mI9Gngr3Y9n/VBVLRsgxtoa+LcZYMpvIC+21u7/joG2vZFupM2P0J3Vzh5249M9x/sg8IN0LTbOrKrr+tz+1iDtJwSr6r7WRPcH6cYnGuQ9nHZJ9gKeQHf2etfAsb77hTnWl+es+E+j+72PPx9g27vWxBDZQ5nqTmdJnjrWeDpJjkhyQ5LrkzwzyWrgihb7WX3GavYF3kj3hXUKXX3+N6rq030nguYX6DonvQ74pyTfbI9vJem9qeeY713b5nHAbcCGJCvp6n//GLgm3e8h9x1v7LGeRteS6OdmJ4J2tdV7uM1MD2rm/lVVfW4mEfR9fDOJYK57Zb3GGruebcwHI46nQzec9CHAs4BvAD/Syg8F/nHg4xy8Dn+a37uZbdIl2APp6tKf1MofD6zZ1o9vEd6/Z9PdwP0G3U3y5RPLhrgHM/Y9kdGOb6xYi/6hGfLBiOPp8MCme7OHnB1k7JeWBH6GrmXGFXQ9gXsbuGp7ee/meP+um7VsiHiLPtbTwO/fVjEw3jQc31ixpr1pKUl2r6q7oRtPJ8nPAh+nG92zT5NVbm+YtWynnmPNrsN/c01nHf5Y791MvJlena+aKNuBAd6/tu1Rj29kO1VrdlxVZyW5AfhEktczYjXOgMY8vnFiLXaGHTh7v4yWQWeVPw74i55jvYA5mgTStTL4/QGO7T66S+Bvcf8l8WCXxdP83rXt/jDwyDnKlwO/sK0f3yK8f2uYNcQz3bDPVwPfWuz925aOb6xYtiaS2DrG7pkmSX4C2FhVn5tVvjvdfa1eh/cY25jHN1asqU4GY7bFX4x2/9NsEcZCGrWd+rR/XqY9uY55fGPFmvZ7Bs/iQdrib8Oxtgdjv56T2x9jcLNp/7ycTdeSbtROYCM6m/GOb5RY054MxhxPZzHG7plmY7+eY7dTn/bPy9jJdWxjHt8osaa601l1Pxx9QVWtomuytw64JEnvP+YxZqztwSK8nk+b6UQHPHXoTnXbwedlUTqBjWjM4xsl1lTfM4Bxx9MZe+yeaTftr+c0H1+Se+l+ryF0w4DfM7OIbsTW3RZr3/ow5vGNFWuqk8GY4+lsD2P3jGnaX89pPz5te6Y9GdxHl1HhgZdXQ2Tv0WJtD6b99Zz249O2Z6qTgSTp4ZnqG8iSpIfHZCBJMhlIkkwGkiRMBpIk4P8D6LQ/m2TvFPEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "gatrain.group.value_counts().sort_values(ascending=False).plot(kind='bar')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3)看一下手机APP应用情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:21:48.026173Z",
     "iopub.status.busy": "2021-07-24T13:21:48.025826Z",
     "iopub.status.idle": "2021-07-24T13:21:48.248857Z",
     "shell.execute_reply": "2021-07-24T13:21:48.247881Z",
     "shell.execute_reply.started": "2021-07-24T13:21:48.026141Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apps: (459943, 2) Labels: (930, 2)\n",
      "Apps nunique: app_id      113211\n",
      "label_id       507\n",
      "dtype: int64 Lables nunique: label_id    930\n",
      "category    835\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "apps = pd.read_csv('/kaggle/input/app_labels.csv.zip')\n",
    "labels = pd.read_csv('/kaggle/input/label_categories.csv.zip')\n",
    "print(\"Apps:\", apps.shape, 'Labels:', labels.shape)\n",
    "print(\"Apps nunique:\",apps.nunique(),'Lables nunique:', labels.nunique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:19:30.192037Z",
     "iopub.status.busy": "2021-07-24T13:19:30.191711Z",
     "iopub.status.idle": "2021-07-24T13:19:30.202720Z",
     "shell.execute_reply": "2021-07-24T13:19:30.201669Z",
     "shell.execute_reply.started": "2021-07-24T13:19:30.192007Z"
    }
   },
   "source": [
    "可以看出app表中有大量重复的。同时app表中的label并未完全覆盖label分类表中的label。  \n",
    "将apps表与label_categories表合并："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:20:33.618448Z",
     "iopub.status.busy": "2021-07-24T13:20:33.618123Z",
     "iopub.status.idle": "2021-07-24T13:20:33.689199Z",
     "shell.execute_reply": "2021-07-24T13:20:33.688225Z",
     "shell.execute_reply.started": "2021-07-24T13:20:33.618420Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape after join:(459943, 3)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>app_id</th>\n",
       "      <th>label_id</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7324884708820027918</td>\n",
       "      <td>251</td>\n",
       "      <td>Finance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-4494216993218550286</td>\n",
       "      <td>251</td>\n",
       "      <td>Finance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6058196446775239644</td>\n",
       "      <td>406</td>\n",
       "      <td>unknown</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                app_id  label_id category\n",
       "0  7324884708820027918       251  Finance\n",
       "1 -4494216993218550286       251  Finance\n",
       "2  6058196446775239644       406  unknown"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apps_extended = apps.merge(labels, how='left', on='label_id')\n",
    "print(\"Shape after join:\" + str(apps_extended.shape))\n",
    "apps_extended.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 3.1)App分类流行度情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:54:40.200425Z",
     "iopub.status.busy": "2021-07-24T13:54:40.200103Z",
     "iopub.status.idle": "2021-07-24T13:54:40.405863Z",
     "shell.execute_reply": "2021-07-24T13:54:40.404943Z",
     "shell.execute_reply.started": "2021-07-24T13:54:40.200396Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAj3klEQVR4nO3deXhV5bn38e9NyEhGSAhhDCCiqIgYJ5yn2ta2ao9V64TWlo6n49tT23O9te2Z7Ok5rbbnOlZbtVgH1LZW27e1VZxnwqCiiIAQQiQTEJIASUhyv3+sFYwYshOSnZWd/ftcV66999rDuheL7F+eZ631PObuiIiI9GZU1AWIiMjwp7AQEZGYFBYiIhKTwkJERGJSWIiISEyjoy5gIAoLC720tDTqMkREEsry5cvr3b2oP+9J6LAoLS2lvLw86jJERBKKmVX09z3qhhIRkZgUFiIiEpPCQkREYlJYiIhITAoLERGJKW5hYWZ3mFmtma3utmysmT1mZuvC24JwuZnZz81svZm9Zmbz41WXiIj0XzxbFr8BPrzfsuuBpe4+C1gaPgb4CDAr/FkE3BLHukREpJ/iFhbu/gywfb/FFwCLw/uLgQu7Lb/LAy8B+WZWEq/aRESkf4b6mEWxu28N71cDxeH9SUBlt9dtCZd9gJktMrNyMyuvq6uLX6UiIrJPZAe4PZh1qd8zL7n7be5e5u5lRUX9ulpdREQO0lCHRU1X91J4WxsurwKmdHvd5HCZiIgMA0MdFo8AC8P7C4GHuy2/Ojwr6kRgZ7fuKhERiVjcBhI0s/uAM4BCM9sC3ADcCDxgZtcBFcAl4cv/AnwUWA/sBq6NV10iItJ/cQsLd//0AZ46u4fXOvDleNUiIiIDoyu4RUQkJoWFiIjEpLAQEZGYFBYiIhKTwkJERGJSWIiISEwKCxERiUlhISIiMSksREQkJoWFiIjEpLAQEZGYFBYiIhKTwkJERGJSWIiISEwKCxERiUlhISIiMSksREQkJoWFiIjEpLAQEZGYFBYiIhKTwkJERGJSWIiISEwKCxERiUlhISIiMSksREQkJoWFiIjEpLAQEZGYFBYiIhKTwkJERGIaHXUBIiIyNNo7Otm+u+2g3quwEBFJUO5OU2s79U2tbNvVxrbmVuqb29jW3Ma2Xa1sa26jvvm95xr27MX94NalsBARGWZ2tbZT39xKXVPrvtu65rb3Pw7vt7Z39vgZeZmpjMtOo3BMOrPGZ3PijLGMG5NOYXYaV/+4/zUpLEREhkDL3o79vvxbqW9qo665Jbx977ndbR0feL8ZjM1KoygnncLsdKYXjgnvp1GYHSwbF94vyEojbfSBD0lffRD1KyxERAagZW8HtY2t1Da1ULPfbdfy2qZWGnbv7fH9+VmpFIVf9kdPzqcwO31fCBTlBPeLstMZOyaN0SnRnZOksBAR6cGeto5eA6CmsZXaxhYaW9o/8N7UFKMoO52i3AxKx43h+OljKc7JoCgnnfG56fsCYdyY9F5bAMOJwkJEksqu1nZqm4Iv+prwdt/jbi2Bph5CIC1l1L4v/EOKslkwcxzjc9IZn5vB+Jx0isPbgqw0Ro2yCLYufiIJCzP7BvBZwIHXgWuBEmAJMA5YDlzl7gd3jpeIJJ2OTqe+uZXqnS1s3dlCTeN7t9XhbW1TK82tPYTA6FEU56YzPieDQ4tzOHVWEUXdvvy7bvOzUjEbWSHQV0MeFmY2CfgqMMfd95jZA8BlwEeBn7n7EjP7JXAdcMtQ1yciw0/L3o59X/rV4e3+gVDb1EpH5/vPC01NMcbnZDAhL4PDS3I5fXYQCPsCIDed4pwMcjNHJ20I9FVU3VCjgUwz2wtkAVuBs4DLw+cXAz9AYSEyork7jS3t3UJgD9U7W6lu3PO+QNjRw8Hh7PTRTMjLYEJuBjNnFlKSl0FxXgYluUE4TMjLYOwI7A6KypCHhbtXmdl/AZuBPcDfCbqdGty9q324BZjU0/vNbBGwCGDq1KnxL1hEDoq7U9/cxrsNe/a1Bvbddru/Z+8HTxMtzE5jQl4GkwsyKSstYEJuBsW5GZTkZTIhL2gV5GSkRrBVySuKbqgC4AJgOtAAPAh8uK/vd/fbgNsAysrKDvJaRBEZqJa9HVTvbKGqYQ9VDXt4t2EPVTv28O7OPbzbECxv2++Csa5uoZK8DOZMzOXsw8bvawV0BUJxbkbCnCGUTKLohjoH2OjudQBm9gfgZCDfzEaHrYvJQFUEtYkIQaugYffeHoOgqqGFqh17qG9ufd97zGB8TjoT8zM5YmIuH5pTzMT8TCbmZ1KibqGEF0VYbAZONLMsgm6os4Fy4EngYoIzohYCD0dQm0hS2NvRua9V8MEg2M27DR/sHspIHcXE/Ewm5Wdy+OHj993vup2QpxbBSBbFMYuXzex3wAqgHVhJ0K30/4AlZvav4bLbh7o2kZGirb2Tim272Lx9d7fWwXtBUNPU8oEB5Qqz05iUn8mhxTmcMXv8+4JgUkEmBUl82qhEdDaUu98A3LDf4neA4yMoRyRhNbbsZUNtM+trm9lQt4v1tc28U9dMxfbd7zuNNC1lFBPzM5iYn8kpswqDAOgKg4KgmygjNSXCLZHhTldwiySAlr0dvFXdxOtbGni7ppkNdUFA1Da9d9wgNcWYXjiG2RNyOH9uCYeMz2bq2CwmFWRSOCZdxwpkQBQWIsNMW3sna6ubeK2qgdVVO3lty07WVjfRHrYUctJHM3N8NqcdWsTMomwOGZ/NzKIxTB2bFelAczKyKSxEItTR6bxd08RrWxp4bctOXq/ayVtbm2jrCE45zctMZe7kPBadNoO5k/M4clIek/IzdexAhpzCQmQI7dy9lxWVO1hZsYPlm3ewanMDu8K5C3LSR3PkpDyuPbmUoybnMXdSPlPGKhhkeFBYiMRJZ6fzTv0uVlTsYMXmHSyv2MG62mYARhkcNiGXT86fzPxp+Rw9OZ/ScWN0XEGGLYWFyCDZ09bBqsoGVmzeQfmm7aysbNg34U1eZirzp+ZzwbyJzJ9awNFT8hmTrl8/SRz63ypykGoaWyjfFLQYllds5413G/cdhD5kfDYfPmIC86cWMH9aATMK1WqQxKawEOmDjk5nbXUTyyu2s7xiB+UVO9iyYw8QXNl89OR8Pn/6DI6dVsD8qQXkZ6VFXLHI4FJYiPSgubWdVZsbKA/DYdXmBprCSXPG56RTVlrAtSdP59hpBRwxMZdUnbIqI5zCQgSoathD+abtYZfSDtZsbaTTg8HxZhfncMExEymbNpZjpxUwuUBnKEnyUVhI0nF3NtTt4sUN9by0cTsrKnawdWcLAFlpKRwzNZ+vnDWLY6cVcMzUfHI1b4KIwkKSQ1XDHp5fX8+LG7bxwoZ6ahqDYTJK8jIoKx1L2bQCjp1WwGETcnQVtEgPFBYyItU3t4bBEIRDxbbdAIwbk8ZJM8dx8iGFLJg5jqljs9SlJNIHCgsZETo6neUVO3jszWqeXVfPW9VNQHBV9AkzxrLwpFIWHDKO2cU5CgeRg6CwkIS1p62D59bX8/c3qnnirVq27WojNcU4fvpYvn3ebBbMHMdRk/LUrSQyCBQWklC272pj6ZoaHnuzhmfW1dGyt5OcjNGcOXs8584p5ozZReTogLTIoFNYyLC3edtu/v5mNX9/s4byTdvp9ODA9CVlUzh3TjEnTB+n6TxF4kxhIcPS+tom/vp6NX9dXc2bWxsBOGxCDl858xDOnTOBIyfl6tiDyBBSWMiw4O68ubWRR1cHAbE+HJ312GkF/PNHD+e8IyYwdVxWxFWKJC+FhUSms9N5dUvDvoDYvH03owxOmD6Oq0+axnlHTKA4NyPqMkUEhYVEYM3WRh4or+TR1dVs3dlCaoqxYGYhXzpjJufOKWZcdnrUJYrIfhQWMiSaW9v506vvsuSVzby6ZSdpo0dx+qFFfPu82Zx9eDF5mTqDSWQ4U1hI3Lg7KysbWPLKZv782lZ2t3UwuziHGz4+h4uOmaRhvEUSiMJCBt2OXW08tLKK+5dVsramiay0FD4+dyKXHT+FeVPydRaTSAJSWMig6Ox0XnpnG/ctq+Rvq6tp6+jk6Cn53PjJo/jY0RPJ1hSiIgkt5m+wmZ0MrHL3XWZ2JTAfuNndK+JenQx7tY0tPLh8Cw+UV1KxbTe5GaO5/ISpXHrcFA4vyY26PBEZJH35c+8W4GgzOxr4FvBr4C7g9HgWJsNXe0cnT79dx5JllTzxVi0dnc6JM8byjXMO5cNHTiAjNSXqEkVkkPUlLNrd3c3sAuB/3P12M7su3oXJ8FO5fTcPlFfyYPkWqhtbKMxO53OnzuDS46YwvXBM1OWJSBz1JSyazOy7wFXAqWY2CtB5jkmitb2Dx96s4f5llTy3vh6A0w8t4gefOIKzDx+vuadFkkRfwuJS4HLgM+5ebWZTgZ/EtyyJ2tade/jtixUsWVbJ9l1tTMrP5OtnH8qnyiYzMT8z6vJEZIjFDIswIO4FjjezjwPL3P2u+JcmUVhV2cAdz23kL69vpdOdcw4v5ooTp3HKIYWkjNIpryLJqi9nQ30W+D7wBGDAL8zsR+5+R7yLk6HR3tHJo29Uc8dzG1mxuYGc9NFcs6CUhQtKmTJWg/eJSN+6ob4NHOPu2wDMbBzwAqCwSHANu9tYsqySu17YxLs7W5g2LosffHwOF5dN0XURIvI+fflG2AY0dXvcFC6TBLW+tpnfvLCR3y+vYs/eDhbMHMePLjiSMw8br64mEelRX8JiPfCymT0MOHAB8JqZfRPA3X8ax/pkEK2tbuKnj63lb2/UkDZ6FBfOm8i1J0/XxXMiElNfwmJD+NPl4fA2Z/DLkXjYVL+Lmx5/m4dffZcxaaP56tmzuPqkaRRqKHAR6aO+nA31QwAzyw0eelOMt8RkZvkEV4IfSdBa+QywFrgfKAU2AZe4+46BriuZvduwh188sY4HyreQmmIsOm0GXzhtJgVjNNqriPRPX86GKgPuJGxJmNlOgmsulg9gvTcDj7r7xWaWBmQB3wOWuvuNZnY9cD3wnQGsI2nVN7fyv09u4O6XK3B3rjxhKl8+8xDGa9Y5ETlIfemGugP4krs/C2BmpxCEx9yDWaGZ5QGnAdcAuHsb0BYOJ3JG+LLFwFMoLPpl5+693PbsBu58fhMtezv4h/mT+erZs3T6q4gMWF/CoqMrKADc/Tkzax/AOqcDdcCd4eCEy4GvAcXuvjV8TTVQ3NObzWwRsAhg6tSpAyhj5NjV2s5vXtjErU9voLGlnY/NLeEb5x7KzKLsqEsTkRGiL2HxtJndCtxHcHzhUuApM5sP4O4rDmKd84F/dPeXzexmgi6nfcKBC72nN7v7bcBtAGVlZT2+Jpm8U9fMdYvL2Vi/i3MOH883z53NnIk6u0lEBldfwuLo8PaG/ZYfQxAeZ/VznVuALe7+cvj4dwRhUWNmJe6+1cxKgNp+fm7SeX59PV+8ezmjU0Zx7+dOYMHMwqhLEpERqi9nQ505mCsMx5qqNLPZ7r4WOBt4M/xZCNwY3j7cy8ckvXteruD7D7/BzKIx3L7wOB2XEJG46tOYDmZ2PnAEsO90Gnf/0QDW+4/APeGZUO8A1wKjgAfCuTIqgEsG8PkjVntHJ//2lzXc+fwmzpxdxM8/fQw5GRoxXkTiqy+nzv6S4NTWMwmujbgYeGUgK3X3VUBZD0+dPZDPHekaW/byj/eu5Om36/jMydP55/MP1/AcIjIk+tKyWODuc83sNXf/oZn9N/DXeBcm77d5226uW7yMjfW7+PeLjuLyE3QmmIgMnb6ExZ7wdreZTSQYRLAkfiXJ/l7ZuJ0v3L2cjk7nruuO14FsERlyfQmLP4fDc/wEWEFwBtSv4lmUvOfB8kq+99DrTCnI4vZrjtNc1yISib6cDfUv4d3fm9mfgQx33xnfsqSz0/nx397i1qff4eRDxvG/lx9LXpYOZItINPo1w427twKtcapFuvnWg6/y0MoqrjhhKj/4xBGkpoyKuiQRSWKaDm0YKt+0nYdWVvGlM2by7fNmY6YznkQkWgf8c9XMTg5vNenBELt56TrGjUnjK2cdoqAQkWGht76Nn4e3Lw5FIRJYXrGdZ9fV8/nTZ5CVpoafiAwPvX0b7TWz24BJZvbz/Z9096/Gr6zkddPjQaviyhOnRV2KiMg+vYXFx4BzgPMIhhGXOFtesYNn19Xz3Y8cplaFiAwrB/xGcvd6YImZrXH3V4ewpqR189J1jB2TxlUnqVUhIsNLX87H3GZmD5lZbfjzezObHPfKksyKzTt45u06Fp2mYxUiMvz0JSzuBB4BJoY/fwqXySC6+fGwVaFjFSIyDPUlLMa7+53u3h7+/AYoinNdSWXl5h08/XYdnzt1BmPS1aoQkeGnL2FRb2ZXmllK+HMlwWCCMkhuXrqOgqxUrtaxChEZpvoSFp8hmIioGthKMJ/FtfEsKpmsqmzgqbV1fO40tSpEZPjqy0CCFcAnhqCWpHTz42+Tn5XK1SeVRl2KiMgBaXS6CL1a2cCTa4NjFdlqVYjIMKawiNDNS9eRn5XKwgWlUZciItIrhUVEXtvSwBNv1fLZU6arVSEiw16fw8LMTjSzR83sKTO7MI41JYWbH19HXqZaFSKSGA74J62ZTXD36m6LvglcBBjwMvDH+JY2cr2+ZSdL36rlW+ceSk6GZr8TkeGvt/6PX5rZCuA/3b0FaCA4bbYTaByC2kasm5e+HbQqTi6NuhQRkT45YDeUu18IrAT+bGZXA18H0oFxwIVDUNuItLpqJ4+vqeW6U6aTq1aFiCSIXo9ZuPufCIYozwMeAt5295+7e91QFDcS3fT4OnIzRnONWhUikkB6m1b1E2b2JPAosBq4FLjAzJaY2cyhKnAkCVoVNVx3ygy1KkQkofR2zOJfgeOBTOBv7n488C0zmwX8G3DZENQ3oty8VK0KEUlMvYXFTuCTQBZQ27XQ3dehoOi3t6obeezNGr5+zizyMtWqEJHE0tsxi4sIDmaPBi4fmnJGrluffoestBSu0XUVIpKAYk2r+oshrGXEqty+m0defZdrFpSSn5UWdTkiIv2m4T6GwO3PbWSUwWdPnR51KSIiB0VhEWfbmltZsmwzF8ybREleZtTliIgcFIVFnC1+sYKWvZ184fQZUZciInLQFBZxtKu1ncUvbOJDc4o5ZHxO1OWIiBw0hUUc3ffKZnbu2csXztA1jCKS2BQWcdLW3sntz23khOljmT+1IOpyREQGJLKwMLMUM1tpZn8OH083s5fNbL2Z3W9mCX2O6cOrqti6s4UvqlUhIiNAlC2LrwFruj3+MfAzdz8E2AFcF0lVg6Cz07n1mXc4vCSX0w8tirocEZEBiyQszGwycD7w6/CxAWcBvwtfspgEHgb98TU1rK9t5gunzyDYNBGRxBZVy+Im4J8IJlKCYFiRBndvDx9vASZFUNeAuTu3PL2BKWMzOf+okqjLEREZFEMeFmb2MaDW3Zcf5PsXmVm5mZXX1Q2/aTVe2bidlZsbWHTqDEan6PwBERkZovg2Oxn4hJltApYQdD/dDOSbWddYVZOBqp7e7O63uXuZu5cVFQ2/4wG3PL2BcWPS+FTZlKhLEREZNEMeFu7+XXef7O6lBEOdP+HuVwBPEszxDbAQeHioaxuoNVsbeWptHdeeXEpGakrU5YiIDJrh1E/yHeCbZrae4BjG7RHX02+/fHoDY9JSuOrE0qhLEREZVL1NfhR37v4U8FR4/x2CmfkSUuX23fzp1Xe57pTp5GVpciMRGVmGU8siof3q2XdIGWVcd4oGDBSRkUdhMQjqm1u5f1klnzxmMhPyMqIuR0Rk0CksBsHiFzbR1tHJIg1DLiIjlMJigJrDYcjPmzOBmUXZUZcjIhIXCosBWvLKZhpb2jUMuYiMaAqLAWhr7+TXz27kpBnjmDclP+pyRETiRmExAH9cVUV1Y4taFSIy4iksDlJnp3NbOAz5abMKoy5HRCSuFBYH6Ym3ajUMuYgkDYXFQbr1mQ1Mys/koxqGXESSgMLiICyv2M6yTTv47KnTSdUw5CKSBPRNdxBuffod8rNSufQ4DUMuIslBYdFP62ubeWxNDVefOI2stEjHYRQRGTIKi3761TPvkJYyiqsXlEZdiojIkFFY9ENNYwsPrazikrIpFGanR12OiMiQUVj0w53Pb6K9s5PPnjo96lJERIaUwqKPmlr2cs9LFXzkqBKmjRsTdTkiIkNKYdFH972ymabWdj5/moYhF5Hko7Dog7b2Tm5/biMLZo5j7uT8qMsRERlyCos+eHhVFTWNrXz+dA0YKCLJSWERQ9eAgYdNyNGAgSKStBQWMTzxVi3rapv5wukzNWCgiCQthUUMXQMGnj9XAwaKSPJSWPSia8DA607RgIEiktz0DdiLrgEDLzteAwaKSHJTWByABgwUEXmPwuIAfv2sBgwUEemisOhBbWMLf1hRxafKJmvAQBERFBY9uvOFcMDAUzS0h4gIKCw+oKllL3e/VMFHjiyhtFADBoqIgMLiAx4o30JTSzuLNGCgiMg+CotuOjud3764iWOnFXD0lPyoyxERGTYUFt08s66OTdt2c/VJ06IuRURkWFFYdHPXixUUZqfzkSM1tIeISHcKi1DFtl08ubaWy0+YStpo/bOIiHSnb8XQ3S9VkGLGFSdMjboUEZFhR2EB7Gnr4P5llZx35ASKczOiLkdEZNgZ8rAwsylm9qSZvWlmb5jZ18LlY83sMTNbF94WDFVND6+qorGlnYUnlQ7VKkVEEkoULYt24FvuPgc4Efiymc0BrgeWuvssYGn4OO7cncUvVnDYhByOKx2yfBIRSShDHhbuvtXdV4T3m4A1wCTgAmBx+LLFwIVDUU95xQ7WbG1k4YJSzYQnInIAkR6zMLNS4BjgZaDY3beGT1UDxQd4zyIzKzez8rq6ugHXsPiFTeRmjOaCeRMH/FkiIiNVZGFhZtnA74Gvu3tj9+fc3QHv6X3ufpu7l7l7WVFR0YBqqGls4dHV1VxSNkVzVoiI9CKSsDCzVIKguMfd/xAurjGzkvD5EqA23nXc+/JmOty58kRdsS0i0psozoYy4HZgjbv/tNtTjwALw/sLgYfjWUdbeyf3vrKZMw4t0uiyIiIxRNH3cjJwFfC6ma0Kl30PuBF4wMyuAyqAS+JZxKNvVFPX1KqZ8ERE+mDIw8LdnwMOdNrR2UNVx10vbGLauCxOnzWw4x4iIskgKa/gXl21k/KKHVx14jRGjdLpsiIisSRlWPz2xQoyU1P41LFToi5FRCQhJF1YNOxu44+rqrjwmEnkZaVGXY6ISEJIurB4oLyS1vZOTXAkItIPSRUWHZ3Ob1+q4PjpYzm8JDfqckREEkZShcVTa2up3L5Ho8uKiPRTUoXFXS9WUJybzoeO6HHYKREROYCkCYuN9bt4+u06rjhhGqkpSbPZIiKDImm+NX/7YgWpKcZlx+t0WRGR/kqKsNjV2s6Dyyv56FEljM/RtKkiIv2VFGHxx1VVNLW0c7UObIuIHJSkCIv8zDQ+NreE+VPzoy5FRCQhJcWMP+fPLeH8uSVRlyEikrCSomUhIiIDo7AQEZGYFBYiIhKTwkJERGJSWIiISEwKCxERiUlhISIiMSksREQkJnP3qGs4aGbWBKyNuo44KgTqoy4ijkby9o3kbQNtX6Kb7e45/XlDol/Bvdbdy6IuIl7MrFzbl5hG8raBti/RmVl5f9+jbigREYlJYSEiIjEleljcFnUBcabtS1wjedtA25fo+r19CX2AW0REhkaityxERGQIKCxERCSmhA0LM/uwma01s/Vmdn3U9QwmM9tkZq+b2aqDOcVtuDGzO8ys1sxWd1s21sweM7N14W1BlDUOxAG27wdmVhXuw1Vm9tEoaxwIM5tiZk+a2Ztm9oaZfS1cnvD7sJdtGxH7z8wyzOwVM3s13L4fhsunm9nL4ffn/WaWFvOzEvGYhZmlAG8D5wJbgGXAp939zUgLGyRmtgkoc/cRcVGQmZ0GNAN3ufuR4bL/BLa7+41h2Be4+3eirPNgHWD7fgA0u/t/RVnbYDCzEqDE3VeYWQ6wHLgQuIYE34e9bNsljID9Z2YGjHH3ZjNLBZ4DvgZ8E/iDuy8xs18Cr7r7Lb19VqK2LI4H1rv7O+7eBiwBLoi4JjkAd38G2L7f4guAxeH9xQS/oAnpANs3Yrj7VndfEd5vAtYAkxgB+7CXbRsRPNAcPkwNfxw4C/hduLxP+y5Rw2ISUNnt8RZG0A4m2Jl/N7PlZrYo6mLipNjdt4b3q4HiKIuJk6+Y2WthN1XCddH0xMxKgWOAlxlh+3C/bYMRsv/MLMXMVgG1wGPABqDB3dvDl/Tp+zNRw2KkO8Xd5wMfAb4cdnOMWB70hSZef2jvbgFmAvOArcB/R1rNIDCzbOD3wNfdvbH7c4m+D3vYthGz/9y9w93nAZMJemUOO5jPSdSwqAKmdHs8OVw2Irh7VXhbCzxEsINHmpqwv7ir37g24noGlbvXhL+kncCvSPB9GPZ3/x64x93/EC4eEfuwp20bafsPwN0bgCeBk4B8M+saG7BP35+JGhbLgFnhEf004DLgkYhrGhRmNiY80IaZjQE+BKzu/V0J6RFgYXh/IfBwhLUMuq4v0dBFJPA+DA+S3g6scfefdnsq4ffhgbZtpOw/Mysys/zwfibBSUFrCELj4vBlfdp3CXk2FEB4KttNQApwh7v/W7QVDQ4zm0HQmoBgVOB7E33bzOw+4AyCYZ9rgBuAPwIPAFOBCuASd0/Ig8QH2L4zCLowHNgEfL5b/35CMbNTgGeB14HOcPH3CPr2E3of9rJtn2YE7D8zm0twADuFoHHwgLv/KPyeWQKMBVYCV7p7a6+flahhISIiQydRu6FERGQIKSxERCQmhYWIiMSksBARkZgUFiIiEpPCQoYlM5tgZkvMbEM47MlfzOzQXl6fb2ZfGsoae7PfqKWrzewTg/z5Z5jZn/v5nolm9rvw/rxEHUlVoqGwkGEnvFDqIeApd5/p7scC36X3sYfygbiHRberXvviZ+EwC58C7jCzyH7fzGy0u7/r7l0XYs0DFBbSZwoLGY7OBPa6+y+7Frj7q+7+rJllm9lSM1thwZwfXaMN3wjMDP+S/wmAmX3bzJaFg8H9sOuzzOz/WjAXynNmdp+Z/Z9w+Twzeyl8/UNdg8eZ2VNmdpMFc4v8s5ltDIeIwMxyuz/uibuvAdqBQjP7dFj3ajP7cbeams3sZ+GcA0vNrKjbusvC+4Xh8PXvY2bHm9mLZrbSzF4ws9nh8mvM7BEzewJYamal4XrTgB8Bl4b/XpdaMCdF1zpHWTDPQVH/dpuMZAoLGY6OJJhXoCctwEXhQItnAv8dtkSuBza4+zx3/7aZfQiYRTCmzzzgWDM7zcyOA/4BOJpgoMaybp99F/Add59LcEXvDd2eS3P3Mnf/IfAUcH64/DKCeQH2HmhjzOwEgquDU4EfEwwPPQ84zswuDF82Bih39yOAp/dbdyxvAae6+zHA94F/7/bcfOBidz+9a0E4rP/3gfvDf6/7gbuBK8KXnEMwv0FdP2qQEa4/TWqR4cCAfw9H4u0kGFq5p+6pD4U/K8PH2QThkQM87O4tQIuZ/QnAzPKAfHd/Onz9YuDBbp93f7f7vwb+iWDIkmuBzx2g1m+Y2ZVAE3ApQTA91fUlbGb3AKeFn9PZbR13A3/4wKcdWB6w2MxmEQxP0b2V81gfh+C4g2B8oJuAzwB39mP9kgTUspDh6A3g2AM8dwVQBBwbHg+oATJ6eJ0B/xH+5TzP3Q9x99sHUNOurjvu/jxQamZnACnufqBB5n4WrvtUd3+2n+vrGoennfd+T3vaToB/AZ4MZ+n7+H6v29XzW/ZbmXslwSiyZxG0xv7az3plhFNYyHD0BJBu3SZ+MrO5ZnYqwV/Rte6+18zOBKaFL2kiaDV0+RvwGQvmKcDMJpnZeOB54OMWzE2cDXwMwN13AjvCdQBcRdAddCB3AffSv7/AXwFOD489pBAMVte1jlG8Nwro5QTTX0IwiF1XcHY9v7883hti+po+1rL/vxcELaa7gQfdvaOPnyNJQmEhw044kc5FwDnhqbNvAP9BMBvbPUCZmb0OXE3QX4+7bwOeDw/g/sTd/07wZf5i+NrfATnuvoxgaO3XCP56fh3YGa56IfATM3uN4JjCj3op8x6gALivH9u1leDYypPAq8Byd+8aGnoXcLyZrSY4ptG17v8CvmhmKwlGte3JfwL/Eb6mr13LTwJzug5wh8seIeiuUxeUfIBGnZWkY2bZ4QT2WcAzwKKueZj78RkXAxe4+1WDVFOzu2cPxmcNoIYygq6zU2O+WJKODnBLMrrNzOYQ9O0vPoig+AXBmVQj5joFM7se+CLvnREl8j5qWYiISEw6ZiEiIjEpLEREJCaFhYiIxKSwEBGRmBQWIiIS0/8HlARFkQzHgxUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total categories: 473\n"
     ]
    }
   ],
   "source": [
    "ac = apps_extended.category.value_counts()\n",
    "acs = ac.cumsum()/ac.sum()\n",
    "plt.plot(np.arange(acs.shape[0])+1, acs.values*100)\n",
    "plt.xlim(0,30)\n",
    "plt.xlabel('Category Popularity')\n",
    "plt.ylabel('% of apps');\n",
    "plt.show()\n",
    "\n",
    "print(\"Total categories:\", apps_extended.category.nunique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4)查看一下事件\n",
    "\n",
    "* app_events表中is_installed都为1\n",
    "* 60% app处于inactive状态，40%处于active状态，我们肯定更关心active状态\n",
    "* 32% 的设备产生了事件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:40:05.502217Z",
     "iopub.status.busy": "2021-07-24T13:40:05.501866Z",
     "iopub.status.idle": "2021-07-24T13:40:29.607531Z",
     "shell.execute_reply": "2021-07-24T13:40:29.606529Z",
     "shell.execute_reply.started": "2021-07-24T13:40:05.502183Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "events = pd.read_csv('/kaggle/input/events.csv.zip')\n",
    "app_events = pd.read_csv('/kaggle/input/app_events.csv.zip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:40:58.492586Z",
     "iopub.status.busy": "2021-07-24T13:40:58.492109Z",
     "iopub.status.idle": "2021-07-24T13:40:58.498174Z",
     "shell.execute_reply": "2021-07-24T13:40:58.497151Z",
     "shell.execute_reply.started": "2021-07-24T13:40:58.492554Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "events: (3252950, 5) app events: (32473067, 4)\n"
     ]
    }
   ],
   "source": [
    "print ('events:',events.shape, \"app events:\", app_events.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**所有app都是已安装状态**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:41:53.363978Z",
     "iopub.status.busy": "2021-07-24T13:41:53.363586Z",
     "iopub.status.idle": "2021-07-24T13:41:53.617859Z",
     "shell.execute_reply": "2021-07-24T13:41:53.616849Z",
     "shell.execute_reply.started": "2021-07-24T13:41:53.363926Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All app events has is_installed = 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1    32473067\n",
       "Name: is_installed, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"All app events has is_installed = 1\")\n",
    "app_events.is_installed.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**60% app处于inactive状态**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:43:09.482143Z",
     "iopub.status.busy": "2021-07-24T13:43:09.481790Z",
     "iopub.status.idle": "2021-07-24T13:43:09.733692Z",
     "shell.execute_reply": "2021-07-24T13:43:09.732719Z",
     "shell.execute_reply.started": "2021-07-24T13:43:09.482112Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Active apps\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    0.607891\n",
       "1    0.392109\n",
       "Name: is_active, dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print (\"Active apps\")\n",
    "app_events.is_active.value_counts()*1.0/len(app_events)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:44:26.200033Z",
     "iopub.status.busy": "2021-07-24T13:44:26.199644Z",
     "iopub.status.idle": "2021-07-24T13:44:29.919064Z",
     "shell.execute_reply": "2021-07-24T13:44:29.917969Z",
     "shell.execute_reply.started": "2021-07-24T13:44:26.200002Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique device IDs with events: 60669 percent with events: 0.3249266265344159\n"
     ]
    }
   ],
   "source": [
    "active_events = app_events[app_events.is_active==1]\n",
    "active_apps = active_events.merge(events, how='inner', on='event_id')\n",
    "\n",
    "device_with_event_count = active_apps.device_id.nunique()\n",
    "print(\"Unique device IDs with events:\", device_with_event_count, 'percent with events:', device_with_event_count*1.0/phone.device_id.nunique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5)再探索一下app应用情况\n",
    "\n",
    "* Total active apps: 19237\n",
    "* Total active apps: 10582\n",
    "* Top 3000 apps are on 99+ "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:46:56.734731Z",
     "iopub.status.busy": "2021-07-24T13:46:56.734392Z",
     "iopub.status.idle": "2021-07-24T13:47:00.741869Z",
     "shell.execute_reply": "2021-07-24T13:47:00.740879Z",
     "shell.execute_reply.started": "2021-07-24T13:46:56.734700Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "ga = active_apps.groupby('app_id')\n",
    "apps_popularity = ga.device_id.nunique().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:47:02.278180Z",
     "iopub.status.busy": "2021-07-24T13:47:02.277826Z",
     "iopub.status.idle": "2021-07-24T13:47:06.215263Z",
     "shell.execute_reply": "2021-07-24T13:47:06.214380Z",
     "shell.execute_reply.started": "2021-07-24T13:47:02.278136Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "ga = active_apps.groupby('app_id')\n",
    "apps_popularity = ga.device_id.nunique().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:47:07.933728Z",
     "iopub.status.busy": "2021-07-24T13:47:07.933401Z",
     "iopub.status.idle": "2021-07-24T13:47:08.896240Z",
     "shell.execute_reply": "2021-07-24T13:47:08.895155Z",
     "shell.execute_reply.started": "2021-07-24T13:47:07.933696Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t\t Devices per app\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAm6klEQVR4nO3deZxcVZn/8c/T+57uTjr7CkQggIQQAggigoIiCjIqgVEZZCYzjgrO6CgzDiPzU8ddB2YYMSMgOigIooCgiBhAFIEkLNnIQsjWWbrTSaf3pbqe3x/3NjQhqa6u7upbVf19v171qlunbtd9Tqpznz7n3HuOuTsiIiKpyIs6ABERyV5KIiIikjIlERERSZmSiIiIpExJREREUlYQdQDDMWHCBJ89e3bUYYiIZJUVK1bsdfe6kfisrE4is2fPZvny5VGHISKSVcxs60h9lrqzREQkZUoiIiKSMiURERFJmZKIiIikTElERERSlrYkYma3mlmDma0eUFZrZo+Y2cbwuSYsNzO70cw2mdmLZrYgXXGJiMjISWdL5IfAuw4quxZ41N3nAo+GrwHeDcwNH0uA76UxLhERGSFpSyLu/gSw76Dii4Dbw+3bgYsHlP/IA38Gqs1sSrpiExGRkTHaYyKT3H1XuL0bmBRuTwO2D9hvR1j2Bma2xMyWm9nyxsbG9EUqIiKDimxg3YPVsIa8Ipa7L3X3he6+sK5uRO7aFxGRFI12EtnT300VPjeE5fXAjAH7TQ/LREQkg4323Fn3A1cAXwuf7xtQ/kkzuxM4FTgwoNtLRGTMi8ed3nicWJ/T2xenN3yO9Tk9fXFi8Ti9sXC7//14nN5YnFj89T8zktKWRMzsp8DZwAQz2wF8kSB5/MzMrgK2Ah8Kd38IuADYBHQAV6YrLhGRRNyd7lic7t443X199MTiwaMv/tr2wNd9g5Qd9Lq7LzixH/Lnwu2BJ/z+RBGLD7n3f1SkLYm4+2WHeevcQ+zrwCfSFYuIZK943Ons7QsePX109ATbXb19dMfidPVv98bpioXPvX10xfroCrdf2y9Od+zgnx3wGbE43bGR+0u9IM8oKsgLHvkHPQ/YrigpoCg/j8KCPIrDsoJ8ozA/L3wYBXlBef92YUEehXnBPgX5RlF+HgXhvgO3+z/jtX2MqV8fsSpm91TwIpIZYn1x2nv6aO+OBSf5nj46emJ09PbRFZ74O3r76Ox57f3O3r7X79vzxkTR0ROjqze1k3p+nlFSkEdJYT4lhfkUF+RRXJhPSWEeJQX5VJQXUFwQvj7EPsUF+RQNOKn3n/QLB5z8iw9OEAXBCbs4LMvLsxH+l848SiIiY1RXbx+tXTHau2O0dQfP7T0x2rr7aDtMeXt3jLausLzntX2GeqIvKsijrCifssJ8SovyKSsqoLQon9ryIspqghN6WX/5q9v5lBYVUFb0WiJ4NSmECaA/GZQUBH+JS/opiYhkIXenvaePls5eWrtitHT1JtgOn7titHb2vrrdk2S3TWlhPuXFBVQU51NRUkB5UQFTxpVQXlzwannwHLwuK8oPT/wFYYLoTwCvJYX8MfAX+lihJCISIXenpSvGgY5e9nf00NzZS3NHD/vb+7fD1x29r753oDNIEoONsxYX5FFVWkhlSQFVJYWMKy1kek0pVSWFVJUGZRUDTv7Bcz6VJQWvJojyogKd8CUhJRGRERTri7Ovo4emtvDR3s3eth6a2rpffb2/P2F09HKgs5e+BNmgsqSAmrIiqssKqS4rYlZtGdVlha9LBJXhdmVJIVUlBa8mjuKC/FGsuYxVSiIig4j1xdnb1sOeli72tHTR2J8Q2rrZ2z4wQfSwv6MHP0ROKMgzxlcUUVteTG15IcdOrgoTQ2GYJIqoLi2kpryQcaVF1JQFLQf160umUxKRMcvdOdDZS31zJw0t3WGS6GZPaxcN/dstXext6z5k19G40kLGVxQxobyYoyZWcGpFEePLi5lQUcT4imLGlwfPdRXFVJUWYKZuIck9SiKSs+Jxp7Gtmx37O6lv7qR+fyf1zR3hc/C6vafvDT83vryIiVUlTKoqZt6UKiZVFTNpXAmTKkuYVFVCXWUxteVFFBWolSCiJCJZraMnxtamDrbsbWdLUwdbm9rZtq+D+uZOdjV30XPQFA/jSguZVl3KrPHlvOXICUyvKWVqdSmTx4UJoqJYyUFkCJREJON19fbxcmMbW/Z2sKWpna1N7a9uN7R2v27fCRVFzKgt44Rp43jX8ZOZXl3KtJpSplWXMa2mlIpi/cqLjCT9j5KM0dYdY1NDG5sa2tjY0MqmPW1sbGhj+/6O1w1W11UWM3t8GWe9qY45E8qZNb6M2eOD58qSwugqIDIGKYnIqHN3duzvZO2uFtbubGHNzhbW7Wqhvrnz1X2K8vOYM6GcE6aP45IF0zhqYgVzJpQze3w55WpNiGQM/W+UtHJ3tjR18Ny2/azZ2cKanQdYu7OFlq4YAHkGR9RVcPKsGi4/dSZHTaxg7sQKZtaW6fJWkSygJCIjqrWrlxe2H+C5bft5bnszz23bz/6OXgBKCvM4ZnIVF544leOmVjFvShXHTK6itEg3xYlkKyURGZZdBzp5evM+nn6liZVbm9nQ0Prq+MXciRW8c94kFsysYf7MauZOrNQUGiI5RklEhqS+uZOnNzfx581NPP3KPrY2dQDB9Bwnz6rhghOmcNLMak6cUc24Ug1yi+Q6JRFJqLWrlz+93MQTGxr5w8a9bNsXJI1xpYUsmlPLR0+fzalzajl2SpVaGSJjkJKIvE487qzZ2cITGxt5fH0jK7ftJxZ3yovyOf3ICVx5xmxOnTOeYyZXjokFd0QkMSURoau3j6debuLhNbv53bo97G3rAeC4qVUsOesIznpTHQtm1uhObhF5AyWRMaq9O8bv1u3ht2v38NhLDbT39FFRXMDZR9dx7rETOfOoOuoqi6MOU0QynJLIGNITi/PEhkbue2Enj6zdTVdvnAkVxbxv/jTOP24Spx85XmtQiMiQKInkOHdn+db9/OK5eh5atYvmjl5qygr5wMnTuWj+NE6eWaOxDRFJmZJIjtrb1s3PV+zgrme3s3lvO6WF+Zx33CQunj+NM+dOoFB3g4vICFASySHxuPPkpr3c+ew2Hlm7h94+Z+GsGv7+7UdxwQmTKSvS1y0iI0tnlRzQ2dPHz1fu4LY/vsLLje3UlBVyxemzWbxoBkdNrIw6PBHJYUoiWWz3gS5uf2oLP31mG80dvRw/rYrvXnoiF5wwRQPkIjIqlESy0I79Hdy07GXuWbGdvrhz3rzJXPXWOSycVaN1vEVkVCmJZJHt+zr4n8c2cc+KHRjG4lNmsuSsI5hRWxZ1aCIyRimJZIF97T385+828JOnt5FnxmWLZvLxs49kyrjSqEMTkTEuqSRiZguAMwEH/ujuK9MalQDBzYE/emoLNzy6kY6ePhafMoNPnnOUkoeIZIxBk4iZ/RvwQeDesOg2M7vb3b+c1sjGuN+/tIcv/Wodr+xt56w31XHde45l7iRdaSUimSWZlshfAie6exeAmX0NeB5QEkmDhtYu/v3+tTy4ahdH1pVz25Wn8PajJ0YdlojIISWTRHYCJUBX+LoYqB/OQc3sH4C/JugeWwVcCUwB7gTGAyuAj7h7z3COk03cnbue3c5/PLSOrlicz573JpacdaRmzhWRjJZMEjkArDGzRwhO+u8EnjGzGwHc/eqhHNDMpgFXA/PcvdPMfgYsBi4Avuvud5rZzcBVwPeG8tnZamdzJ5+9+wX+9HITp86p5auXnMARdRVRhyUiMqhkksgvwke/x0bouKVm1guUAbuAc4DLw/dvB65nDCSR+56v57pfriYWd756yQksPmWG7vUQkawxaBJx99vNrAg4hqAlsn443UzuXm9m3wK2AZ3Abwm6r5rdPRbutgOYluoxssGBzl6u++Vq7n9hJwtmVvPdS+cza3x51GGJiAxJMldnXQB8H3gZMGCOmf2tu/86lQOaWQ1wETAHaAbuBt41hJ9fAiwBmDlzZiohRG51/QE+fscKdjZ38Zl3vomPn30kBZpVV0SyUDLdWd8B3u7umwDM7EjgQSClJAK8A3jF3RvDz7sXOAOoNrOCsDUyncMM3rv7UmApwMKFCz3FGCLh7vz0me1c/8AaxpcX8bO/PZ2TZ9VEHZaISMqSSSKt/QkktBloHcYxtwGnmVkZQXfWucByYBnwAYIrtK4A7hvGMTJOV28f//KLVdy7sp63zp3ADYtPora8KOqwRESGJZkkstzMHgJ+RjAm8kHgWTO7BMDd7030wwdz96fN7B5gJRADniNoWTwI3GlmXw7LbhnK52ayhpYu/uZHy3mx/gCffsdcPnXOXPK1mqCI5ABzT9wjZGa3JXjb3f1jIxtS8hYuXOjLly+P6vBJWbuzhb++/Vn2d/Ryw+L5nHfc5KhDEpExzsxWuPvCkfisZK7OunIkDjQWLXupgU/8ZCVVJYXc/Xenc/y0cVGHJCIyopK5OquE4Ma/4wjuXAcgyhZINvjlc/V85u4XOHZKJbdccQqTqkoG/yERkSyTzHWlPwYmA+cDjxNcOTWcgfWc98M/vsKn73qeRbNruXPJ6UogIpKzkkkiR7n7dUC7u98OvAc4Nb1hZa///v1Grn9gLefNm8RtV55CRbGWbBGR3JVMEukNn5vN7HhgHKBpZQ/hpmWb+NZvN3DJSdP4n79cQEmh1jkXkdyWzJ/JS8O7zP8VuB+oAK5La1RZ6ObHX+abD6/n/SdN45sfPFGX8IrImJDM1Vk/CDefAI5IbzjZ6X+f2MzXfv0S7ztxKt9SAhGRMUQTNg3Tnc9s4ysPrePCN0/hOx9SAhGRsUVJZBgeXbeHL/xyNWcfXcd3L52vSRRFZMzRWS9Fz29v5pM/eY55U6q46fIFFCqBiMgYNOiZz8w+aGaV4fa/mtm9ZrYg/aFlroaWLpb8aDkTKou49a9OoVyX8YrIGJXMn8/XuXurmZ1JMI37LYyBFQcPpycW5+N3rKS1K8b/fnQhdZXFUYckIhKZZJJIX/j8HmCpuz8IjNk5zL/0q7Ws2Lqfb37wzRwzuSrqcEREIpVMEqk3s+8DlwIPmVlxkj+Xc+5/YSc//vNWlpx1BBe+eWrU4YiIRC6ZZPAh4GHgfHdvBmqBf0pnUJlo+74OvnDvKhbMrOZz5x8ddTgiIhnhsCPCZlbl7i0EM/c+FpbVAt0EKxGOGbG+OJ++63kAblh8ki7lFREJJbqs6CfAhcAKghUNB95F54yhu9dvWvYyK7bu54bF85lRWxZ1OCIiGeOwScTdLwyf54xeOJlnU0Mr/71sI+89cSoXzZ8WdTgiIhklqRsczGwaMGvg/u7+RLqCyhTxuPMv966mrKiAL753XtThiIhknGRWNvw6wZVZa3ntcl8nmJAxp929YjvPbNnH1//iBCZU6H4QEZGDJdMSuRg42t270xxLRmlq6+Y/HnqJRbNr+eDJM6IOR0QkIyVzmdFmoDDdgWSa7zyygbbuGF95//HkaWZeEZFDSnSJ738RdFt1AM+b2aMEl/cC4O5Xpz+8aKzf3cpPn9nGR0+fzdxJlVGHIyKSsRJ1Z/XfC7KCYEXDgTw94UTP3fnyg2upLCnkmnPnRh2OiEhGS3SJ7+0AZnaNu98w8D0zuybdgUXlsfWN/GHjXq67cB415WN2ijARkaQkMyZyxSHK/mqE48gIfXHnq79ex5wJ5XzktFlRhyMikvESjYlcBlwOzDGzgd1ZlcC+dAcWhV+9uJMNe9q46fIFFBVoahMRkcEkGhP5E7ALmAB8e0B5K/BiOoOKQqwvzg2/28gxkyt59/GTow5HRCQrJBoT2QpsBU4fvXCic/8LO9m8t52bP3yyLukVEUmS+mwIWyGPbuS4qVWcf9ykqMMREckaSiLAg6t2sbWpg2vOnYuZWiEiIska80nE3bn58c0cNbGCdxyrVoiIyFAMmkTM7Awze8TMNpjZZjN7xcw2j0Zwo+EPG/eyblcLS846QmMhIiJDlMwEjLcA/0Bw53rfIPsmxcyqgR8AxxPc/f4xYD1wFzAb2AJ8yN33j8TxEvn+Ey8zqaqYi+ZrzXQRkaFKpjvrgLv/2t0b3L2p/zHM494A/MbdjwFOBNYB1wKPuvtc4NHwdVqtrj/AHzc1ceUZcyguyE/34UREck6imw0XhJvLzOybwL28fgLGlakc0MzGAWcR3vXu7j1Aj5ldBJwd7nY7wbrun0/lGMn68VNbKS3M57JFM9N5GBGRnJWoO+vbB71eOGDbgXNSPOYcoBG4zcxOJOgmuwaY5O67wn12A4cc5TazJcASgJkzUz/5H+jo5b4X6nn/SdMYVzrmZroXERkRiW42fDuAmR3h7q8bSDezI4Z5zAXAp9z9aTO7gYO6rtzdzeyQMwW7+1JgKcDChQtTnk34npU76OqN82HNkSUikrJkxkTuOUTZ3cM45g5gh7s/PeDzFwB7zGwKQPjcMIxjJOTu3PHnrSyYWc1xU8el6zAiIjkv0ZjIMcBxwDgzu2TAW1VASaoHdPfdZrbdzI529/XAuQTrt68lmDH4a+HzfakeYzBPbW5i8952vvOhE9N1CBGRMSHRmMjRwIVANfDeAeWtwN8M87ifAu4wsyKC5XevJGgV/czMriKYs+tDwzzGYd27sp7K4gIuOGFKug4hIjImJBoTuQ+4z8xOd/enRvKg7v48rx+o73fuSB7nUDp7+vjN6t1ccMJkSgp1Wa+IyHAk6s76nLt/A7g8XFvkdbJ1jfVH1u2hrTvGxSdNizoUEZGsl6g7a134vDzBPlnnl8/VM2VcCafNGR91KCIiWS9Rd9YD4eaT7v7yKMWTVk1t3Ty+oZG/eavmyRIRGQnJzJ11q5lNB54F/gA84e6r0htWevxu3R764s57T9SAuojISBg0ibj728KrqE4hmJbkQTOrcPfadAc30h5es4fpNaXMm1IVdSgiIjlh0CRiZmcCbw0f1cCvCFokWaWtO8aTG/fy4dNmaeEpEZERkkx31mME81t9FXgonDAx6zy+vpGevriWvxURGUHJJJEJwBkEM+9ebWZx4Cl3vy6tkY2wh9fspra8iIWzs64XTkQkYyUzJtIcrmQ4A5gOvAXIqmlve/viLFvfwLuPn0y+rsoSERkxyYyJbAZeIhgH+R5wZbZ1aT23rZnWrhjnHDMx6lBERHJKMt1ZR7l7PO2RpNHjGxrIzzPectSEqEMREckpg04Fn+0JBOCJDXtZMLOaqpKs6oUTEcl4yawnktX2tnWzqv4Ab3tTXdShiIjknJxPIn/Y2AjA296k8RARkZE2aBIxs2vMrMoCt5jZSjM7bzSCGwmPr29kfHkRx03VXeoiIiMtmZbIx9y9BTgPqAE+QrD6YMZzd/6wcS9vnTtBEy6KiKRBMkmk/+x7AfBjd18zoCyjbWpoo6m9h7ccqauyRETSIZkkssLMfkuQRB42s0ogK67YembLPgAWzdFd6iIi6ZDMfSJXAfOBze7eYWbjCdZEz3jPvLKPuspiZo0vizoUEZGclExLxIF5QP9yuOVASdoiGiHuzjOv7GPRnFrN2isikibJJJH/AU4H+tdZbwVuSltEI2TH/k52HehikSZcFBFJm2S6s0519wVm9hyAu+8PF6nKaM9qPEREJO2SaYn0mlk+QbcWZlZHFgysr9i6n8riAo6eVBl1KCIiOSuZJHIj8Atgopl9BXgS+I+0RjUCVtUf4ITp43R/iIhIGiWznsgdZrYCOJfg/pCL3X1d2iMbhu5YH+t2tfCxM+dEHYqISE5LZj2R04A17n5T+LrKzE5196fTHl2K1u9upbfPefO06qhDERHJacl0Z30PaBvwui0sy1gv7jgAwJunj4s4EhGR3JbUtCfu7v0vwvVFkrmqKzKrdhygpqyQ6TWlUYciIpLTkkkim83sajMrDB/XAJvTHdhwvLS7hWOnVOkmQxGRNEsmifwd8BagHtgBnAosSWdQwxGPOxv2tHH0ZF3aKyKSbslcndUALB6FWEbE9v0ddPb26f4QEZFRcNgkYmafc/dvmNl/Ed5oOJC7X32IH4vc+t2tAGqJiIiMgkQtkf57QZan48DhXfDLgXp3v9DM5gB3AuOBFcBH3L1nqJ+7YU+QROaqJSIiknaHTSLu/kC4ucrdV6bh2NcQJKr+dWu/DnzX3e80s5sJpqAf8qXEL+1uZUZtKRXFGX0BmYhITkhmYP3bZrbOzL5kZsePxEHNbDrwHuAH4WsDzgHuCXe5Hbg4lc/e1NDG3IlqhYiIjIZBk4i7vx14O9AIfN/MVpnZvw7zuP8JfI7XJnIcDzS7eyx8vQOYdqgfNLMlZrbczJY3NjYeHCtbmzqYPb58mOGJiEgykmmJ4O673f1Ggst9nwf+LdUDmtmFQIO7r0jl5919qbsvdPeFdXV1r3uvsbWbzt4+Zk/QSoYiIqMhmbmzjgUuBT4A7AXuAj4zjGOeAbzPzC4gWCGxCrgBqDazgrA1Mp3gvpQh2dLUAcAstUREREZFMi2RW4H9wHnufra7fy+8dyQl7v7P7j7d3WcT3H/ye3f/S2AZQaICuAK4b6ifvaWpHYA5SiIiIqMimTGR04GlQLpHqz8P/KOZbSIYI7llqB+wtamdgjxjanXGLwEvIpITBk0iZvZegnGQ34Sv55vZ/SNxcHd/zN0vDLc3u/sidz/K3T/o7t1D/bwtezuYUVtGQX5SQz0iIjJMyZxtrwcWAc0A7v48kJGrPW1pamfWeA2qi4iMlqTWWHf3AweVvWEalKjp8l4RkdGXzG3da8zsciDfzOYCVwN/Sm9YQ9fU3kNbd0wtERGRUZRMS+RTwHFAN/BToAX4dBpjSsnW8MostUREREZPMlPBdwBfCB8Za9u+4B6RGbVqiYiIjJZEU8E/QIKxD3d/X1oiSlH9/k4ALYkrIjKKErVEvhU+XwJMBv4vfH0ZsCedQaWivrmL8eVFlBTmRx2KiMiYkWgq+McBzOzb7r5wwFsPmFla1hgZjp3NnUytVitERGQ0JTOwXm5mR/S/CBePyrjR6yCJ6E51EZHRlMwlvv8APGZmmwEDZgFL0hrVELk7O5s7OXPuhKhDEREZU5K5Ous34f0hx4RFL6UyJUk6tXTGaO/pY5q6s0RERlVSa8iGSeOFNMeSsh3NweW9SiIiIqMrJ2Yq3NncBaCBdRGRUXbYJGJmZ4TPxaMXTmp2Ngf3iCiJiIiMrkQtkRvD56dGI5Dh2NncSVFBHuPLi6IORURkTEk0JtJrZkuBaWZ248FvuvvV6QtraOqbO5k6roS8PIs6FBGRMSVRErkQeAdwPrBidMJJjW40FBGJRqI71vcCd5rZOnfP2CuzABpauzlldm3UYYiIjDnJXJ3VZGa/MLOG8PFzM5ue9siS5O40tnZTV5nx4/8iIjknmSRyG3A/MDV8PBCWZYSWrhjdsTh1FUoiIiKjLZkkMtHdb3P3WPj4IVCX5riS1tga3Dw/sUpJRERktCWTRPaa2YfNLD98fBhoSndgyWpoDW40VEtERGT0JZNEPgZ8CNgN7AI+AFyZzqCGQi0REZHoJDMB41Ygo1YxHKg/idRVaBp4EZHRlvVzZzW2dlNUkEdVaVJzSYqIyAjKiSRSV1GMme5WFxEZbVmfRBp0j4iISGSSTiJmdpqZ/cbMHjOzi9MY05A0tnYzUUlERCQSiaaCn3xQ0T8C7wcuAL6UzqCGoqG1Sy0REZGIJBqNvtnMVgLfcPcuoJng8t440DIKsQ3KHfZ39DKxUldmiYhE4bAtEXe/GHgO+JWZfRT4NFAMjAcuHoXYBhWLxwGYUKl1REREopBwTMTdHyCYCn4c8Atgg7vf6O6NoxHcYPriDkBtmZKIiEgUEo2JvM/MlgG/AVYDlwIXmdmdZnZkqgc0sxlmtszM1prZGjO7JiyvNbNHzGxj+Fwz2Gf1J5FxZYWphiMiIsOQqCXyZeDdBFOefN3dm939M8B1wFeGccwY8Bl3nwecBnzCzOYB1wKPuvtc4NHwdUL9SaS6VC0REZEoJBpYPwBcApQBDf2F7r4RWJzqAd19F8EcXLh7q5mtA6YBFwFnh7vdDjwGfD7RZ8X6k4haIiIikUjUEnk/wSB6AXB5Og5uZrOBk4CngUlhgoFgssdJh/mZJWa23MyWt7S2AVCjMRERkUgMtjzuf6XrwGZWAfwc+LS7twyctsTd3cz8MHEtBZYCTJ97vBcV5FFSmPU33ouIZKVIzr5mVkiQQO5w93vD4j1mNiV8fwoDutAOpy/uVJcWat4sEZGIjHoSseCMfwuwzt2/M+Ct+4Erwu0rgPsG+6xY3DUeIiISoSjmTz8D+AiwysyeD8v+Bfga8DMzuwrYSnBVWEJBS0TjISIiURn1JOLuTwKH6386dyif1edOZYnWERERiUpWj0i7O6VF+VGHISIyZmV1EonHobxILRERkahkdRLpU0tERCRSWZ1E3J0yJRERkchkdxIBJRERkQhldRIBKNOYiIhIZHIgiaglIiISlaxPIhpYFxGJTtYnEXVniYhEJ+uTSGmhWiIiIlHJ+iRSkK8ZfEVEopL1SaRQSUREJDJZn0QK8rK+CiIiWSvrz8DqzhIRiU72JxG1REREIpP1Z2C1REREopP1SaRQLRERkchk/RlYLRERkegoiYiISMqyPomoO0tEJDpZfwbOV0tERCQyWZ9E1BIREYlO1p+BNSYiIhKd7E8ieUoiIiJRyeokYoCZkoiISFSyOomg/CEiEqmsTiKmLCIiEqmsTiLjSgujDkFEZEzL6iQyvaY06hBERMa0rE4iIiISLSURERFJmZKIiIikLKOSiJm9y8zWm9kmM7s26nhERCSxjEkiZpYP3AS8G5gHXGZm86KNSkREEsmYJAIsAja5+2Z37wHuBC6KOCYREUkgk5LINGD7gNc7wjIREclQmZREkmJmS8xsuZktb2xsjDocEZExrSDqAAaoB2YMeD09LHsdd18KLAUws1YzWz864UViArA36iDSKJfrl8t1A9Uv2x09Uh+USUnkWWCumc0hSB6LgcsH+Zn17r4w7ZFFxMyWq37ZKZfrBqpftjOz5SP1WRmTRNw9ZmafBB4G8oFb3X1NxGGJiEgCGZNEANz9IeChqOMQEZHkZN3A+kGWRh1Amql+2SuX6waqX7YbsfqZu4/UZ4mIyBiT7S0RERGJkJKIiIikLGuTSC5M1mhmW8xslZk933/JnZnVmtkjZrYxfK4Jy83Mbgzr+6KZLYg2+jcys1vNrMHMVg8oG3J9zOyKcP+NZnZFFHU5lMPU73ozqw+/w+fN7IIB7/1zWL/1Znb+gPKM+901sxlmtszM1prZGjO7JizPie8vQf1y5fsrMbNnzOyFsH7/HpbPMbOnw1jvMrOisLw4fL0pfH/2gM86ZL0Py92z7kFwCfDLwBFAEfACMC/quFKoxxZgwkFl3wCuDbevBb4ebl8A/Bow4DTg6ajjP0R9zgIWAKtTrQ9QC2wOn2vC7Zqo65agftcDnz3EvvPC38tiYE74+5qfqb+7wBRgQbhdCWwI65AT31+C+uXK92dARbhdCDwdfi8/AxaH5TcDHw+3/x64OdxeDNyVqN6Jjp2tLZFcnqzxIuD2cPt24OIB5T/ywJ+BajObEkF8h+XuTwD7Dioean3OBx5x933uvh94BHhX2oNPwmHqdzgXAXe6e7e7vwJsIvi9zcjfXXff5e4rw+1WYB3B3HU58f0lqN/hZNv35+7eFr4sDB8OnAPcE5Yf/P31f6/3AOeamXH4eh9WtiaRXJms0YHfmtkKM1sSlk1y913h9m5gUridrXUean2ysZ6fDLt0bu3v7iGL6xd2bZxE8Ndszn1/B9UPcuT7M7N8M3seaCBI3i8Dze4eC3cZGOur9QjfPwCMJ4X6ZWsSyRVnuvsCgjVUPmFmZw1804P2Zc5cg51r9Ql9DzgSmA/sAr4daTTDZGYVwM+BT7t7y8D3cuH7O0T9cub7c/c+d59PMO/gIuCY0ThutiaRpCZrzHTuXh8+NwC/IPji9/R3U4XPDeHu2VrnodYnq+rp7nvC/7xx4H95remfdfUzs0KCE+wd7n5vWJwz39+h6pdL318/d28GlgGnE3Qz9s9MMjDWV+sRvj8OaCKF+mVrEnl1ssbwaoPFwP0RxzQkZlZuZpX928B5wGqCevRf0XIFcF+4fT/w0fCqmNOAAwO6GTLZUOvzMHCemdWEXQvnhWUZ6aBxqfcTfIcQ1G9xeBXMHGAu8AwZ+rsb9offAqxz9+8MeCsnvr/D1S+Hvr86M6sOt0uBdxKM+ywDPhDudvD31/+9fgD4fdjSPFy9Dy/qqwpSfRBcHbKBoN/vC1HHk0L8RxBcBfECsKa/DgT9ko8CG4HfAbX+2tUXN4X1XQUsjLoOh6jTTwm6BHoJ+lKvSqU+wMcIBvQ2AVdGXa9B6vfjMP4Xw/+AUwbs/4WwfuuBd2fy7y5wJkFX1YvA8+Hjglz5/hLUL1e+vzcDz4X1WA38W1h+BEES2ATcDRSH5SXh603h+0cMVu/DPTTtiYiIpCxbu7NERCQDKImIiEjKlERERCRlSiIiIpIyJREREUmZkoiMOWZ2sZm5mY3KHb3pYmbVZvb3UcchY5uSiIxFlwFPhs/ZrJpgNlaRyCiJyJgSzp10JsGNgosHlJ9tZk+Y2YPhOgo3m1le+F6bmX03XKfhUTOrC8uvtmB9ihfN7M5DHCvfzL5lZqvDfT4Vlp9rZs9ZsJbMrWZWHJZvMbMJ4fZCM3ss3L4+3O8xM9tsZleHh/gacKQF62B8M13/ZiKJKInIWHMR8Bt33wA0mdnJA95bBHyKYE2FI4FLwvJyYLm7Hwc8DnwxLL8WOMnd3wz83SGOtQSYDcwP97nDzEqAHwKXuvsJQAHw8STiPoZgmvVFwBfDeaCuBV529/nu/k/JVF5kpCmJyFhzGcEaEITPA7u0nvFgnYg+gilOzgzL48Bd4fb/DSh/kSAxfBiI8UbvAL7v4VTc7r4POBp4JUxiEKzpcNYhfvZgD3qwxsNegkkQJw32AyKjoWDwXURyg5nVEizSc4KZOcEqdW5m/X/FHzwH0OHmBOovfw9BAngv8AUzO8FfW7shFTFe+8Ou5KD3ugds96H/u5Ih1BKRseQDwI/dfZa7z3b3GcArwFvD9xeFs7PmAZcSDL5D8P+kfybUy4Enw31muPsy4PMEU2lXHHS8R4C/7Z+KO0xi64HZZnZUuM9HCLrIIFguub977S+SqE8rwVKvIpFREpGx5DKCdVsG+jmvdWk9C/w3wRTarwzYt50gwawmaMn8P4JWzP+Z2SqC2VNv9GAdh4F+AGwDXjSzF4DL3b0LuBK4O/zZOMHa1wD/DtxgZssJWhsJuXsT8Mdw4F4D6xIJzeIrQnB1FvBZd7/wEO+1ufvBrQwRQS0REREZBrVEREQkZWqJiIhIypREREQkZUoiIiKSMiURERFJmZKIiIik7P8DbNXQ1etIVbQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total apps: 19237 Total active apps: 10582\n"
     ]
    }
   ],
   "source": [
    "aps = apps_popularity.cumsum()/apps_popularity.sum()\n",
    "\n",
    "plt.plot(np.arange(aps.shape[0])+1, aps.values*100)\n",
    "plt.xlabel('Apps count')\n",
    "plt.ylabel('% of devices with this app')\n",
    "\n",
    "plt.xlim(0, 3000)\n",
    "\n",
    "print(\"\\t\\t Devices per app\")\n",
    "plt.show()\n",
    "\n",
    "print(\"Total apps:\", app_events.app_id.nunique(), \"Total active apps:\", active_apps.app_id.nunique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 每台设备上的app情况\n",
    "\n",
    "* 60669 active apps on devices\n",
    "* max app on a single device is 1342\n",
    "* 75% of devices contains at most 21 apps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:47:47.093933Z",
     "iopub.status.busy": "2021-07-24T13:47:47.093576Z",
     "iopub.status.idle": "2021-07-24T13:47:52.579812Z",
     "shell.execute_reply": "2021-07-24T13:47:52.578795Z",
     "shell.execute_reply.started": "2021-07-24T13:47:47.093898Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "gd = active_apps.groupby(['device_id'])\n",
    "apps_per_device = gd.app_id.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:47:54.150066Z",
     "iopub.status.busy": "2021-07-24T13:47:54.149665Z",
     "iopub.status.idle": "2021-07-24T13:47:54.164923Z",
     "shell.execute_reply": "2021-07-24T13:47:54.164122Z",
     "shell.execute_reply.started": "2021-07-24T13:47:54.150006Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    60669.000000\n",
       "mean        15.006972\n",
       "std         15.086517\n",
       "min          1.000000\n",
       "25%          5.000000\n",
       "50%         12.000000\n",
       "75%         21.000000\n",
       "max       1342.000000\n",
       "Name: app_id, dtype: float64"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apps_per_device.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:47:56.358913Z",
     "iopub.status.busy": "2021-07-24T13:47:56.358582Z",
     "iopub.status.idle": "2021-07-24T13:47:56.366043Z",
     "shell.execute_reply": "2021-07-24T13:47:56.364939Z",
     "shell.execute_reply.started": "2021-07-24T13:47:56.358881Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apps with at most 21 apps: 75.7\n"
     ]
    }
   ],
   "source": [
    "print(\"Apps with at most 21 apps:\", round(len(apps_per_device[apps_per_device<=21])*100.0/len(apps_per_device), 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "画一下75%的手机设备上的app安装数量分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:48:08.777338Z",
     "iopub.status.busy": "2021-07-24T13:48:08.776964Z",
     "iopub.status.idle": "2021-07-24T13:48:08.973739Z",
     "shell.execute_reply": "2021-07-24T13:48:08.972703Z",
     "shell.execute_reply.started": "2021-07-24T13:48:08.777300Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWyklEQVR4nO3df7Bc5X3f8fc3yBgGubqivlZV0FS4yMpgNHZAA6R2PVemFQJnLNpxGDxMEJiOJlOcsWeUKXJTBxfjKW5MPHYnoVWNGuG4gVsSigbjEFVG4/EfwlgYI37YkcCi5o4sNZYsIltxKvfbP/a5mdXVXu1ZaXfv2s/7NbNzz3nOc/Z8z9lzP3vuOWf3RmYiSarDL811AZKk4TH0Jakihr4kVcTQl6SKGPqSVJF5c13AqYyNjeXFF18812Wc5Mc//jHnnXfeXJdxEuvq3ajWZl29sa4T7dq1668yc7zjxMwc2cfb3va2HEVPPvnkXJfQkXX1blRrs67eWNeJgG/mLLnq6R1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SapIo69hiIgx4AvApUACHwK+CzwELAX2ATdk5uGICOBzwHXAT4BbMvOZ8jzrgH9XnvbuzNzSrxXpl6Ubv9y1z4YVx7llRr9997xvUCVJUt80PdL/HPDnmfnLwDuAl4CNwPbMXAZsL+MA1wLLymM9cB9ARJwP3AlcCVwB3BkRC/u0HpKkBrqGfkQsAN4D3A+QmX+bmT8C1gLTR+pbgOvL8FrggfIVEDuBsYhYDFwDbMvMQ5l5GNgGrOnjukiSumhypH8R8H+A/xYR34qIL0TEecCizNxf+vwAWFSGLwC+3zb/a6VttnZJ0pBEdvnH6BGxEtgJvCszn4qIzwGvA7+VmWNt/Q5n5sKIeAy4JzO/Xtq3A3cAE8A5mXl3af84cCwzPzNjeetpnRZifHz88snJyb6saFO7p4507bPoXDhw7MS2FRcsGFBFzR09epT58+fPdRknGdW6YHRrs67eWNeJVq1atSszV3aa1uRC7mvAa5n5VBl/mNb5+wMRsTgz95fTNwfL9ClgSdv8F5a2KVrB396+Y+bCMnMTsAlg+fLlOTExMbPLQM28QNvJhhXHuXf3iZtu300TA6qouR07djDs7dXEqNYFo1ubdfXGuprrenonM38AfD8ilpemq4EXga3AutK2Dni0DG8Fbo6Wq4Aj5TTQE8DqiFhYLuCuLm2SpCFp+p+zfgv4UkScDbwC3ErrDWMyIm4DXgVuKH0fp3W75l5at2zeCpCZhyLik8DTpd9dmXmoL2shSWqkUehn5rNAp/NDV3fom8DtszzPZmBzD/VJkvrIT+RKUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUahX5E7IuI3RHxbER8s7SdHxHbImJP+bmwtEdEfD4i9kbEcxFxWdvzrCv990TEusGskiRpNr0c6a/KzHdm5soyvhHYnpnLgO1lHOBaYFl5rAfug9abBHAncCVwBXDn9BuFJGk4zuT0zlpgSxneAlzf1v5AtuwExiJiMXANsC0zD2XmYWAbsOYMli9J6lFkZvdOEd8DDgMJ/JfM3BQRP8rMsTI9gMOZORYRjwH3ZObXy7TtwB3ABHBOZt5d2j8OHMvMz8xY1npafyEwPj5++eTkZF9WtKndU0e69ll0Lhw4dmLbigsWDKii5o4ePcr8+fPnuoyTjGpdMLq1WVdvrOtEq1at2tV2VuYE8xo+x7szcyoi3gJsi4jvtE/MzIyI7u8eDWTmJmATwPLly3NiYqIfT9vYLRu/3LXPhhXHuXf3iZtu300TA6qouR07djDs7dXEqNYFo1ubdfXGupprdHonM6fKz4PAI7TOyR8op20oPw+W7lPAkrbZLyxts7VLkoaka+hHxHkR8abpYWA18DywFZi+A2cd8GgZ3grcXO7iuQo4kpn7gSeA1RGxsFzAXV3aJElD0uT0ziLgkdZpe+YB/z0z/zwingYmI+I24FXghtL/ceA6YC/wE+BWgMw8FBGfBJ4u/e7KzEN9WxNJUlddQz8zXwHe0aH9h8DVHdoTuH2W59oMbO69TElSP/iJXEmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqSOPQj4izIuJbEfFYGb8oIp6KiL0R8VBEnF3a31jG95bpS9ue42Ol/bsRcU3f10aSdEq9HOl/BHipbfzTwGcz82LgMHBbab8NOFzaP1v6ERGXADcCbwfWAH8YEWedWfmSpF40Cv2IuBB4H/CFMh7Ae4GHS5ctwPVleG0Zp0y/uvRfCzyYmT/NzO8Be4Er+rAOkqSGIjO7d4p4GPgPwJuA3wZuAXaWo3kiYgnwlcy8NCKeB9Zk5mtl2svAlcAnyjx/XNrvL/M8PGNZ64H1AOPj45dPTk72YTWb2z11pGufRefCgWMntq24YMGAKmru6NGjzJ8/f67LOMmo1gWjW5t19ca6TrRq1apdmbmy07R53WaOiF8DDmbmroiY6HNtJ8nMTcAmgOXLl+fExMAXeYJbNn65a58NK45z7+4TN92+myYGVFFzO3bsYNjbq4lRrQtGtzbr6o11Ndc19IF3Ae+PiOuAc4C/B3wOGIuIeZl5HLgQmCr9p4AlwGsRMQ9YAPywrX1a+zySpCHoek4/Mz+WmRdm5lJaF2K/mpk3AU8CHyjd1gGPluGtZZwy/avZOoe0Fbix3N1zEbAM+Ebf1kSS1FWTI/3Z3AE8GBF3A98C7i/t9wNfjIi9wCFabxRk5gsRMQm8CBwHbs/Mn53B8kfK0ganhTrZd8/7+lyJJM2up9DPzB3AjjL8Ch3uvsnMvwF+fZb5PwV8qtciJUn94SdyJakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0JakiXUM/Is6JiG9ExLcj4oWI+Pel/aKIeCoi9kbEQxFxdml/YxnfW6YvbXuuj5X270bENQNbK0lSR02O9H8KvDcz3wG8E1gTEVcBnwY+m5kXA4eB20r/24DDpf2zpR8RcQlwI/B2YA3whxFxVh/XRZLURdfQz5ajZfQN5ZHAe4GHS/sW4PoyvLaMU6ZfHRFR2h/MzJ9m5veAvcAV/VgJSVIzkZndO7WOyHcBFwN/APwesLMczRMRS4CvZOalEfE8sCYzXyvTXgauBD5R5vnj0n5/mefhGctaD6wHGB8fv3xycrIf69nY7qkjXfssOhcOHOvP8lZcsKA/TwQcPXqU+fPn9+35+mVU64LRrc26emNdJ1q1atWuzFzZadq8Jk+QmT8D3hkRY8AjwC/3r7yTlrUJ2ASwfPnynJiYGNSiOrpl45e79tmw4jj37m606brad9NEX54HYMeOHQx7ezUxqnXB6NZmXb2xruZ6unsnM38EPAn8KjAWEdPJdyEwVYangCUAZfoC4Ift7R3mkSQNQZO7d8bLET4RcS7wz4GXaIX/B0q3dcCjZXhrGadM/2q2ziFtBW4sd/dcBCwDvtGn9ZAkNdDkHMViYEs5r/9LwGRmPhYRLwIPRsTdwLeA+0v/+4EvRsRe4BCtO3bIzBciYhJ4ETgO3F5OG0mShqRr6Gfmc8CvdGh/hQ5332Tm3wC/PstzfQr4VO9lSpL6wU/kSlJFDH1JqoihL0kV6c/N5jptSxt8LqCTffe8r8+VSKqBR/qSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JF/O6dyvhdP1LdDP2fU53Ce8OK443+sbukehn6auR0/kLwrwNp9HhOX5Iq4pG+Ro7XHaTB8Uhfkirikb4GZvqI3QvM0ujwSF+SKuKRvn5hnM61gA0rjjPR/1KkkeWRviRVpGvoR8SSiHgyIl6MiBci4iOl/fyI2BYRe8rPhaU9IuLzEbE3Ip6LiMvanmtd6b8nItYNbrUkSZ00OdI/DmzIzEuAq4DbI+ISYCOwPTOXAdvLOMC1wLLyWA/cB603CeBO4ErgCuDO6TcKSdJwdA39zNyfmc+U4b8GXgIuANYCW0q3LcD1ZXgt8EC27ATGImIxcA2wLTMPZeZhYBuwpp8rI0k6tcjM5p0jlgJfAy4F/ndmjpX2AA5n5lhEPAbck5lfL9O2A3cAE8A5mXl3af84cCwzPzNjGetp/YXA+Pj45ZOTk2eyfj3bPXWka59F58KBY0MopkfW1btF58Jbzl8w12Wc5OjRo8yfP3+uyziJdfVmrupatWrVrsxc2Wla47t3ImI+8KfARzPz9VbOt2RmRkTzd49TyMxNwCaA5cuX58TERD+etrEm95NvWHGce3eP3o1P1tW7DSuOc8Np7mOD/OTwjh07GPa+34R19WYU62r0mxgRb6AV+F/KzD8rzQciYnFm7i+nbw6W9ilgSdvsF5a2KTjh7rgLgR2nX7r086nJm8XMD7T5FRPqlyZ37wRwP/BSZv5+26StwPQdOOuAR9vaby538VwFHMnM/cATwOqIWFgu4K4ubZKkIWlypP8u4DeA3RHxbGn7t8A9wGRE3Aa8CtxQpj0OXAfsBX4C3AqQmYci4pPA06XfXZl5qB8rIUlqpmvolwuyMcvkqzv0T+D2WZ5rM7C5lwIlSf0zmlfX+uR0L7RJo2bY+7LXEH5x+TUMklQRQ1+SKmLoS1JFfqHP6Us6PbNdQ+j2D3G8FjD6PNKXpIoY+pJUEU/vSOqbQX4fkfrDI31JqoihL0kVMfQlqSKe05f0c2v6GkK3W0nb1X79wCN9SaqIoS9JFfH0jqQ55zfiDo9H+pJUEY/0JVWl9g+QGfqS1MDpvFlsWHGcif6XckY8vSNJFTH0Jakihr4kVcRz+pI0QKN24dgjfUmqiKEvSRUx9CWpIl1DPyI2R8TBiHi+re38iNgWEXvKz4WlPSLi8xGxNyKei4jL2uZZV/rviYh1g1kdSdKpNDnS/yNgzYy2jcD2zFwGbC/jANcCy8pjPXAftN4kgDuBK4ErgDun3ygkScPTNfQz82vAoRnNa4EtZXgLcH1b+wPZshMYi4jFwDXAtsw8lJmHgW2c/EYiSRqwyMzunSKWAo9l5qVl/EeZOVaGAzicmWMR8RhwT2Z+vUzbDtwBTADnZObdpf3jwLHM/EyHZa2n9VcC4+Pjl09OTp72yu2eOnLa857KonPhwLGBPPUZsa7eLToX3nL+gtOad1D7F4zuNrOu3pxJXSsuOL39EmDVqlW7MnNlp2lnfJ9+ZmZEdH/naP58m4BNAMuXL8+JiYnTfq6m/0mnVxtWHOfe3aP3EQfr6t2GFce54TT3sUHtXzC628y6enMmde27aaK/xRSne/fOgXLahvLzYGmfApa09buwtM3WLkkaotMN/a3A9B0464BH29pvLnfxXAUcycz9wBPA6ohYWC7gri5tkqQh6vp3R0T8Ca1z8m+OiNdo3YVzDzAZEbcBrwI3lO6PA9cBe4GfALcCZOahiPgk8HTpd1dmzrw4LEkasK6hn5kfnGXS1R36JnD7LM+zGdjcU3WSpL7yE7mSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVJGhh35ErImI70bE3ojYOOzlS1LNhhr6EXEW8AfAtcAlwAcj4pJh1iBJNRv2kf4VwN7MfCUz/xZ4EFg75BokqVqRmcNbWMQHgDWZ+a/K+G8AV2bmh9v6rAfWl9FLgeeHVmBzbwb+aq6L6MC6ejeqtVlXb6zrRP8oM8c7TZg37Eq6ycxNwCaAiPhmZq6c45JOYl29GdW6YHRrs67eWFdzwz69MwUsaRu/sLRJkoZg2KH/NLAsIi6KiLOBG4GtQ65Bkqo11NM7mXk8Ij4MPAGcBWzOzBdOMcum4VTWM+vqzajWBaNbm3X1xroaGuqFXEnS3PITuZJUEUNfkioyEqHf7asZIuKNEfFQmf5URCwdQk1LIuLJiHgxIl6IiI906DMREUci4tny+N1B11WWuy8idpdlfrPD9IiIz5ft9VxEXDaEmpa3bYdnI+L1iPjojD5D214RsTkiDkbE821t50fEtojYU34unGXedaXPnohYN+Cafi8ivlNep0ciYmyWeU/5mg+otk9ExFTb63XdLPMO7KtVZqnrobaa9kXEs7PMO5BtNls2zPX+1VhmzumD1gXdl4G3AmcD3wYumdHnXwP/uQzfCDw0hLoWA5eV4TcBf9mhrgngsTnYZvuAN59i+nXAV4AArgKemoPX9Ae0PiAyJ9sLeA9wGfB8W9t/BDaW4Y3ApzvMdz7wSvm5sAwvHGBNq4F5ZfjTnWpq8poPqLZPAL/d4LU+5e9vv+uaMf1e4HeHuc1my4a53r+aPkbhSL/JVzOsBbaU4YeBqyMiBllUZu7PzGfK8F8DLwEXDHKZfbQWeCBbdgJjEbF4iMu/Gng5M18d4jJPkJlfAw7NaG7fj7YA13eY9RpgW2YeyszDwDZgzaBqysy/yMzjZXQnrc+uDN0s26uJgX61yqnqKhlwA/An/Vpew5pmy4Y53b+aGoXQvwD4ftv4a5wcrn/Xp/yCHAH+/lCqA8rppF8Bnuow+Vcj4tsR8ZWIePuQSkrgLyJiV7S+tmKmJtt0kG5k9l/Eudhe0xZl5v4y/ANgUYc+c7ntPkTrL7ROur3mg/Lhcupp8yynK+Zye/1T4EBm7pll+sC32YxsGPX9CxiN0B9pETEf+FPgo5n5+ozJz9A6hfEO4D8B/3NIZb07My+j9W2lt0fEe4a03K6i9aG79wP/o8PkudpeJ8nW39ojc79yRPwOcBz40ixd5uI1vw/4x8A7gf20TqWMkg9y6qP8gW6zU2XDqO1f7UYh9Jt8NcPf9YmIecAC4IeDLiwi3kDrRf1SZv7ZzOmZ+XpmHi3DjwNviIg3D7quzJwqPw8Cj9D6E7vdXH7dxbXAM5l5YOaEudpebQ5Mn+YqPw926DP0bRcRtwC/BtxUwuIkDV7zvsvMA5n5s8z8f8B/nWWZc7KvlRz4l8BDs/UZ5DabJRtGcv+aaRRCv8lXM2wFpq9yfwD46my/HP1SzhfeD7yUmb8/S59/MH1tISKuoLU9B/pmFBHnRcSbpodpXQic+U2kW4Gbo+Uq4Ejbn52DNuvR11xsrxna96N1wKMd+jwBrI6IheV0xurSNhARsQb4N8D7M/Mns/Rp8poPorb260D/YpZlztVXq/wz4DuZ+VqniYPcZqfIhpHbvzoa5lXj2R607jb5S1p3AfxOabuL1i8CwDm0ThfsBb4BvHUINb2b1p9nzwHPlsd1wG8Cv1n6fBh4gdYdCzuBfzKEut5alvftsuzp7dVeV9D6ZzUvA7uBlUN6Hc+jFeIL2trmZHvReuPZD/xfWudNb6N1HWg7sAf4X8D5pe9K4Att836o7Gt7gVsHXNNeWud4p/ex6bvU/iHw+Kle8yFsry+W/ec5WoG2eGZtZfyk399B1lXa/2h6v2rrO5RtdopsmNP9q+nDr2GQpIqMwukdSdKQGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIv8fgTUi8V1pxPsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "apps_per_device[apps_per_device<=21].hist(bins=22)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "再画一下最后25%的手机设备上的app安装数量分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:49:21.449583Z",
     "iopub.status.busy": "2021-07-24T13:49:21.449223Z",
     "iopub.status.idle": "2021-07-24T13:49:22.018207Z",
     "shell.execute_reply": "2021-07-24T13:49:22.017379Z",
     "shell.execute_reply.started": "2021-07-24T13:49:21.449552Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD4CAYAAAD2FnFTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAATiElEQVR4nO3df6zddX3H8edbKqDUtWXoXdc23jqbLRUi0hso0SwtzFLQWJagKSGjMJYmG0t004yicfgDMphGJokije1WFb2wqqNpdawrvTH+wa+K0AJ2vUDRNiBKa90FNda998f5XDhcPrfnXnrvOefS5yM5ud/v+/v5fs/7fLj3vHq+53sOkZlIkjTSazrdgCSpOxkQkqQqA0KSVGVASJKqDAhJUtW0TjdwJKecckr29va2HPfcc89x0kknTX5Dk2Cq9m7f7WXf7TXV+96xY8fPM/ONR33AzOza26JFi3Istm/fPqZx3Wiq9m7f7WXf7TXV+wbuzwl4DvYUkySpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVBoQkqepVHRC9a7Z0ugVJmrJe1QEhSXrlDAhJUpUBIUmqMiAkSVUGhCSpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElSlQEhSaoaU0BExN6I2BkRP4yI+0vt5IjYGhF7ys9ZpR4RcVNEDEbEQxFxRtNxVpXxeyJi1eQ8JEnSRBjPK4ilmXl6ZvaV9TXAtsxcAGwr6wDnAwvKbTVwMzQCBbgGOAs4E7hmOFQkSd3naE4xrQA2lOUNwIVN9a9kw93AzIiYDZwHbM3MA5l5ENgKLD+K+5ckTaKxBkQC/xUROyJidan1ZOZTZflpoKcszwF+0rTvvlIbrS5J6kLTxjjuXZm5PyLeBGyNiB81b8zMjIiciIZKAK0G6OnpYWBgoOU+Q0ND1XEfPu3wmPbvpNF673b23V723V72XWTmuG7AJ4CPALuB2aU2G9hdlm8BLm4av7tsvxi4pan+knG126JFi3Istm/fXq2/+arNY9q/k0brvdvZd3vZd3tN9b6B+3Ocz+21W8tTTBFxUkS8YXgZWAbsAjYBw1cirQLuKMubgEvL1UyLgUPZOBV1J7AsImaVN6eXlZokqQuN5RRTD/DtiBge//XM/M+IuA+4PSKuAJ4EPlDGfwe4ABgEngcuB8jMAxHxaeC+Mu5TmXlgwh6JJGlCtQyIzHwceHul/ixwbqWewJWjHGs9sH78bUqS2s1PUkuSqgwISVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElSlQEhSaoyICRJVQaEJKnKgJAkVRkQkqQqA0KSVGVASJKqDAhJUpUBIUmqMiAkSVUGhCSpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVYw6IiDguIh6IiM1lfX5E3BMRgxFxW0QcX+onlPXBsr236RhXl/ruiDhvwh+NJGnCjOcVxAeBR5vWbwBuzMy3AgeBK0r9CuBgqd9YxhERC4GVwNuA5cAXI+K4o2tfkjRZxhQQETEXeA/w5bIewDnAxjJkA3BhWV5R1inbzy3jVwD9mfmbzHwCGATOnIDHIEmaBJGZrQdFbAT+CXgD8BHgMuDu8iqBiJgHfDczT42IXcDyzNxXtj0GnAV8ouzztVJfV/bZOOK+VgOrAXp6ehb19/e37G9oaIjp06e/rL5z/yFOmzOj5f6dNFrv3c6+28u+22uq97106dIdmdl3tMeb1mpARLwXeCYzd0TEkqO9w1Yycy2wFqCvry+XLGl9lwMDA9TGXbZmC3svab1/J43We7ez7/ay7/ay74aWAQG8E3hfRFwAnAj8HvB5YGZETMvMw8BcYH8Zvx+YB+yLiGnADODZpvqw5n0kSV2m5XsQmXl1Zs7NzF4abzLflZmXANuBi8qwVcAdZXlTWadsvysb57E2ASvLVU7zgQXAvRP2SCRJE2osryBGcxXQHxHXAg8A60p9HfDViBgEDtAIFTLz4Yi4HXgEOAxcmZm/O4r7lyRNonEFRGYOAANl+XEqVyFl5q+B94+y/3XAdeNtUpLUfn6SWpJUZUBIkqoMCElSlQEhSaoyICRJVQaEJKnKgJAkVRkQkqQqA0KSVGVASJKqDAhJUpUBIUmqMiAkSVUGhCSpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJU9aoPiN41WzrdgiRNSa/6gJAkvTIGhCSpqmVARMSJEXFvRDwYEQ9HxCdLfX5E3BMRgxFxW0QcX+onlPXBsr236VhXl/ruiDhv0h6VJOmojeUVxG+AczLz7cDpwPKIWAzcANyYmW8FDgJXlPFXAAdL/cYyjohYCKwE3gYsB74YEcdN4GORJE2glgGRDUNl9bXllsA5wMZS3wBcWJZXlHXK9nMjIkq9PzN/k5lPAIPAmRPxICRJEy8ys/Wgxr/0dwBvBb4AfAa4u7xKICLmAd/NzFMjYhewPDP3lW2PAWcBnyj7fK3U15V9No64r9XAaoCenp5F/f39LfsbGhpi+vTpL6vv3H8IgNPmzGh5jE4ZrfduZ9/tZd/tNdX7Xrp06Y7M7Dva400by6DM/B1wekTMBL4N/MnR3vER7mstsBagr68vlyxZ0nKfgYEBauMuK5e47r2k9TE6ZbTeu519t5d9t5d9N4zrKqbM/AWwHTgbmBkRwwEzF9hflvcD8wDK9hnAs831yj6SpC4zlquY3lheORARrwPeDTxKIyguKsNWAXeU5U1lnbL9rmycx9oErCxXOc0HFgD3TtDjkCRNsLGcYpoNbCjvQ7wGuD0zN0fEI0B/RFwLPACsK+PXAV+NiEHgAI0rl8jMhyPiduAR4DBwZTl1JUnqQi0DIjMfAt5RqT9O5SqkzPw18P5RjnUdcN3425QktZufpJYkVRkQkqQqA0KSVGVASJKqDAhJUpUBIUmqMiAkSVUGhCSpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElSlQEhSaoyICRJVQaEJKnKgJAkVRkQkqQqA0KSVNUyICJiXkRsj4hHIuLhiPhgqZ8cEVsjYk/5OavUIyJuiojBiHgoIs5oOtaqMn5PRKyavIclSTpaY3kFcRj4cGYuBBYDV0bEQmANsC0zFwDbyjrA+cCCclsN3AyNQAGuAc4CzgSuGQ6Vyda7Zks77kaSXlVaBkRmPpWZPyjL/ws8CswBVgAbyrANwIVleQXwlWy4G5gZEbOB84CtmXkgMw8CW4HlE/lgJEkTJzJz7IMjeoHvAacCP87MmaUewMHMnBkRm4HrM/P7Zds24CpgCXBiZl5b6h8HfpWZnx1xH6tpvPKgp6dnUX9/f8u+hoaGmD59+svqO/cfemH5tDkzxvw422m03rudfbeXfbfXVO976dKlOzKz72iPN22sAyNiOvBN4EOZ+ctGJjRkZkbE2JPmCDJzLbAWoK+vL5csWdJyn4GBAWrjLms6tbT3ktbH6YTReu929t1e9t1e9t0wpquYIuK1NMLh1sz8Vin/tJw6ovx8ptT3A/Oadp9baqPVJUldaCxXMQWwDng0Mz/XtGkTMHwl0irgjqb6peVqpsXAocx8CrgTWBYRs8qb08tKTZLUhcZyiumdwF8AOyPih6X2UeB64PaIuAJ4EvhA2fYd4AJgEHgeuBwgMw9ExKeB+8q4T2XmgYl4EJKkidcyIMqbzTHK5nMr4xO4cpRjrQfWj6dBSVJn+ElqSVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElSlQEhSaoyICRJVcdMQPi/HZWk8TlmAkKSND4GhCSpyoCQJFUZEJKkKgNCklRlQEiSqgwISVKVASFJqjIgJElVBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElSlQEhSapqGRARsT4inomIXU21kyNia0TsKT9nlXpExE0RMRgRD0XEGU37rCrj90TEqsl5OJKkiTKWVxD/BiwfUVsDbMvMBcC2sg5wPrCg3FYDN0MjUIBrgLOAM4FrhkNFktSdWgZEZn4PODCivALYUJY3ABc21b+SDXcDMyNiNnAesDUzD2TmQWArLw8dSVIXicxsPSiiF9icmaeW9V9k5syyHMDBzJwZEZuB6zPz+2XbNuAqYAlwYmZeW+ofB36VmZ+t3NdqGq8+6OnpWdTf39+yv6GhIaZPn/6y+s79h16yftqcGS2P1W6j9d7t7Lu97Lu9pnrfS5cu3ZGZfUd7vGlHe4DMzIhonTJjP95aYC1AX19fLlmypOU+AwMD1MZdtmbLS9b3XtL6WO02Wu/dzr7by77by74bXulVTD8tp44oP58p9f3AvKZxc0tttLokqUu90oDYBAxfibQKuKOpfmm5mmkxcCgznwLuBJZFxKzy5vSyUpMkdamWp5gi4hs03kM4JSL20bga6Xrg9oi4AngS+EAZ/h3gAmAQeB64HCAzD0TEp4H7yrhPZebIN74lSV2kZUBk5sWjbDq3MjaBK0c5znpg/bi6kyR1jJ+kliRVGRCSpCoDQpJUdUwFRO+aLfSO+GyEJKnumAoISdLYGRCSpCoDQpJUZUBIkqoMCElSlQEhSaoyICRJVcdkQPhZCElq7ZgMCElSawaEJKnKgJAkVRkQkqQqA0KSVGVASJKqDAhJUtUxGxB+FkKSjuyYDQhJ0pEZEJKkqmM6IDzNJEmjO6YDQpI0OgNCklRlQEiSqo75gOhds8X3IiSp4pgPiGGGhCS9VNsDIiKWR8TuiBiMiDXtvv8jMSQk6UVtDYiIOA74AnA+sBC4OCIWtrMHSdLYTGvz/Z0JDGbm4wAR0Q+sAB5pcx+jqr2K2Hv9e+hds4W917+nAx1JUme0OyDmAD9pWt8HnNU8ICJWA6vL6lBE7B7DcU8Bfj4hHVbEDS/9OcEmtfdJZN/tZd/tNdX7fvNEHKzdAdFSZq4F1o5nn4i4PzP7JqmlSTVVe7fv9rLv9rLvhna/Sb0fmNe0PrfUJEldpt0BcR+wICLmR8TxwEpgU5t7kCSNQVtPMWXm4Yj4W+BO4DhgfWY+PAGHHtcpqS4zVXu37/ay7/aybyAycyKPJ0l6lfCT1JKkKgNCklQ15QOim7+6IyLmRcT2iHgkIh6OiA+W+skRsTUi9pSfs0o9IuKm8lgeiogzOtz/cRHxQERsLuvzI+Ke0t9t5UIDIuKEsj5Ytvd2sOeZEbExIn4UEY9GxNlTYb4j4u/K78iuiPhGRJzYrfMdEesj4pmI2NVUG/ccR8SqMn5PRKzqUN+fKb8rD0XEtyNiZtO2q0vfuyPivKZ6W59zan03bftwRGREnFLWJ3a+M3PK3mi80f0Y8BbgeOBBYGGn+2rqbzZwRll+A/A/NL5i5J+BNaW+BrihLF8AfBcIYDFwT4f7/3vg68Dmsn47sLIsfwn467L8N8CXyvJK4LYO9rwB+KuyfDwws9vnm8YHSJ8AXtc0z5d163wDfwqcAexqqo1rjoGTgcfLz1lleVYH+l4GTCvLNzT1vbA8n5wAzC/PM8d14jmn1nepz6Nxwc+TwCmTMd9t/2OY4Ik7G7izaf1q4OpO93WEfu8A3g3sBmaX2mxgd1m+Bbi4afwL4zrQ61xgG3AOsLn8wv286Y/phbkvv6Rnl+VpZVx0oOcZ5Yk2RtS7er558RsGTi7ztxk4r5vnG+gd8UQ7rjkGLgZuaaq/ZFy7+h6x7c+BW8vyS55Lhue8U885tb6BjcDbgb28GBATOt9T/RRT7as75nSolyMqpwHeAdwD9GTmU2XT00BPWe6mx/MvwD8A/1fWfx/4RWYeLuvNvb3Qd9l+qIxvt/nAz4B/LafGvhwRJ9Hl852Z+4HPAj8GnqIxfzvo/vluNt457oq5H+EvafzrG7q874hYAezPzAdHbJrQvqd6QEwJETEd+Cbwocz8ZfO2bMR5V11rHBHvBZ7JzB2d7mWcptF4KX5zZr4DeI7G6Y4XdOl8z6LxpZXzgT8ETgKWd7Spo9CNc9xKRHwMOAzc2uleWomI1wMfBf5xsu9rqgdE1391R0S8lkY43JqZ3yrln0bE7LJ9NvBMqXfL43kn8L6I2Av00zjN9HlgZkQMf7iyubcX+i7bZwDPtrPhYh+wLzPvKesbaQRGt8/3nwFPZObPMvO3wLdo/Dfo9vluNt457pa5JyIuA94LXFLCDbq77z+i8Y+JB8vf6FzgBxHxB0fo7xX1PdUDoqu/uiMiAlgHPJqZn2vatAkYvopgFY33Jobrl5YrERYDh5petrdNZl6dmXMzs5fGnN6VmZcA24GLRul7+PFcVMa3/V+Qmfk08JOI+ONSOpfGV8l39XzTOLW0OCJeX35nhvvu6vkeYbxzfCewLCJmlVdQy0qtrSJiOY1Tqe/LzOebNm0CVpYrxuYDC4B76YLnnMzcmZlvysze8je6j8bFME8z0fM92W+utOHNmwtoXB30GPCxTvczord30Xip/RDww3K7gMb54m3AHuC/gZPL+KDxP1R6DNgJ9HXBY1jCi1cxvYXGH8kg8O/ACaV+YlkfLNvf0sF+TwfuL3P+HzSu2Oj6+QY+CfwI2AV8lcbVM10538A3aLxX8tvy5HTFK5ljGuf8B8vt8g71PUjj3Pzw3+eXmsZ/rPS9Gzi/qd7W55xa3yO27+XFN6kndL79qg1JUtVUP8UkSZokBoQkqcqAkCRVGRCSpCoDQpJUZUBIkqoMCElS1f8DJ/b4xFHNAmkAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "apps_per_device[apps_per_device>21].hist(bins=250)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 手机设备上app分类情况\n",
    "\n",
    "* 75% of devices have at most 45 categories or 16 general categories\n",
    "* Max categories per device (the outliers) has 332 categories or 22 general categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:49:56.798610Z",
     "iopub.status.busy": "2021-07-24T13:49:56.798253Z",
     "iopub.status.idle": "2021-07-24T13:50:03.748584Z",
     "shell.execute_reply": "2021-07-24T13:50:03.747614Z",
     "shell.execute_reply.started": "2021-07-24T13:49:56.798578Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Devices records with all events: (12732996, 8)\n",
      "Devices with unique apps: (910458, 8)\n"
     ]
    }
   ],
   "source": [
    "print(\"Devices records with all events:\", active_apps.shape)\n",
    "#Get the first device app\n",
    "cat_devices = active_apps.groupby(['device_id', 'app_id']).first().reset_index()\n",
    "print(\"Devices with unique apps:\", cat_devices.shape)\n",
    "cat_devices = cat_devices.merge(apps, how='left', on='app_id').merge(labels, how='left', on='label_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:50:03.750373Z",
     "iopub.status.busy": "2021-07-24T13:50:03.750092Z",
     "iopub.status.idle": "2021-07-24T13:50:04.389176Z",
     "shell.execute_reply": "2021-07-24T13:50:04.388360Z",
     "shell.execute_reply.started": "2021-07-24T13:50:03.750344Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>device_id</th>\n",
       "      <th>app_id</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-9222956879900151005</td>\n",
       "      <td>-8670045756254444686</td>\n",
       "      <td>financial</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-9222956879900151005</td>\n",
       "      <td>-8670045756254444686</td>\n",
       "      <td>P2P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-9222956879900151005</td>\n",
       "      <td>-8670045756254444686</td>\n",
       "      <td>Industry tag</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             device_id               app_id      category\n",
       "0 -9222956879900151005 -8670045756254444686     financial\n",
       "1 -9222956879900151005 -8670045756254444686           P2P\n",
       "2 -9222956879900151005 -8670045756254444686  Industry tag"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cat_devices[['device_id', 'app_id', 'category']].head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:50:04.391202Z",
     "iopub.status.busy": "2021-07-24T13:50:04.390813Z",
     "iopub.status.idle": "2021-07-24T13:50:05.760362Z",
     "shell.execute_reply": "2021-07-24T13:50:05.759491Z",
     "shell.execute_reply.started": "2021-07-24T13:50:04.391160Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "g_by_device = cat_devices.groupby(['device_id'])\n",
    "device_categories = g_by_device.category.nunique().to_frame().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:50:09.318014Z",
     "iopub.status.busy": "2021-07-24T13:50:09.317670Z",
     "iopub.status.idle": "2021-07-24T13:50:09.328014Z",
     "shell.execute_reply": "2021-07-24T13:50:09.327148Z",
     "shell.execute_reply.started": "2021-07-24T13:50:09.317982Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>device_id</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-9222956879900151005</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-9222661944218806987</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-9222399302879214035</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-9221825537663503111</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-9221767098072603291</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             device_id  category\n",
       "0 -9222956879900151005        52\n",
       "1 -9222661944218806987        31\n",
       "2 -9222399302879214035        25\n",
       "3 -9221825537663503111        46\n",
       "4 -9221767098072603291        42"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device_categories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-24T13:50:12.349856Z",
     "iopub.status.busy": "2021-07-24T13:50:12.349515Z",
     "iopub.status.idle": "2021-07-24T13:50:12.371144Z",
     "shell.execute_reply": "2021-07-24T13:50:12.370153Z",
     "shell.execute_reply.started": "2021-07-24T13:50:12.349821Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>device_id</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.066900e+04</td>\n",
       "      <td>60669.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-1.655896e+16</td>\n",
       "      <td>31.354794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>5.333714e+18</td>\n",
       "      <td>20.745542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-9.222957e+18</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-4.668856e+18</td>\n",
       "      <td>15.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-1.471330e+16</td>\n",
       "      <td>27.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.609620e+18</td>\n",
       "      <td>45.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>9.222540e+18</td>\n",
       "      <td>332.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          device_id      category\n",
       "count  6.066900e+04  60669.000000\n",
       "mean  -1.655896e+16     31.354794\n",
       "std    5.333714e+18     20.745542\n",
       "min   -9.222957e+18      1.000000\n",
       "25%   -4.668856e+18     15.000000\n",
       "50%   -1.471330e+16     27.000000\n",
       "75%    4.609620e+18     45.000000\n",
       "max    9.222540e+18    332.000000"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device_categories.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
